1   /**
2    * Copyright (c) 2000-2008 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.portlet.wiki.importers.mediawiki;
24  
25  import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26  import com.liferay.portal.NoSuchUserException;
27  import com.liferay.portal.PortalException;
28  import com.liferay.portal.SystemException;
29  import com.liferay.portal.kernel.util.ArrayUtil;
30  import com.liferay.portal.kernel.util.ObjectValuePair;
31  import com.liferay.portal.kernel.util.ProgressTracker;
32  import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
33  import com.liferay.portal.kernel.util.StringPool;
34  import com.liferay.portal.kernel.util.StringUtil;
35  import com.liferay.portal.kernel.util.Validator;
36  import com.liferay.portal.kernel.zip.ZipReader;
37  import com.liferay.portal.model.User;
38  import com.liferay.portal.service.UserLocalServiceUtil;
39  import com.liferay.portal.util.PropsValues;
40  import com.liferay.portlet.tags.NoSuchEntryException;
41  import com.liferay.portlet.tags.model.TagsEntry;
42  import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
43  import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
44  import com.liferay.portlet.tags.util.TagsUtil;
45  import com.liferay.portlet.wiki.ImportFilesException;
46  import com.liferay.portlet.wiki.NoSuchPageException;
47  import com.liferay.portlet.wiki.importers.WikiImporter;
48  import com.liferay.portlet.wiki.importers.WikiImporterKeys;
49  import com.liferay.portlet.wiki.model.WikiNode;
50  import com.liferay.portlet.wiki.model.WikiPage;
51  import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
52  import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
53  import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
54  import com.liferay.util.MapUtil;
55  
56  import java.io.BufferedReader;
57  import java.io.File;
58  import java.io.FileReader;
59  import java.io.IOException;
60  
61  import java.util.ArrayList;
62  import java.util.Collections;
63  import java.util.HashMap;
64  import java.util.Iterator;
65  import java.util.List;
66  import java.util.Map;
67  import java.util.regex.Matcher;
68  import java.util.regex.Pattern;
69  
70  import org.apache.commons.logging.Log;
71  import org.apache.commons.logging.LogFactory;
72  
73  import org.dom4j.Document;
74  import org.dom4j.DocumentException;
75  import org.dom4j.Element;
76  import org.dom4j.io.SAXReader;
77  
78  /**
79   * <a href="MediaWikiImporter.java.html"><b><i>View Source</i></b></a>
80   *
81   * @author Alvaro del Castillo
82   * @author Jorge Ferrer
83   *
84   */
85  public class MediaWikiImporter implements WikiImporter {
86  
87      public static final String SHARED_IMAGES_CONTENT = "See attachments";
88  
89      public static final String SHARED_IMAGES_TITLE = "SharedImages";
90  
91      public void importPages(
92              long userId, WikiNode node, File[] files,
93              Map<String, String[]> options)
94          throws PortalException {
95  
96          if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
97              throw new PortalException("The pages file is mandatory");
98          }
99  
100         File pagesFile = files[0];
101         File usersFile = files[1];
102         File imagesFile = files[2];
103 
104         try {
105             SAXReader saxReader = new SAXReader();
106 
107             Document doc = saxReader.read(pagesFile);
108 
109             Map<String, String> usersMap = readUsersFile(usersFile);
110 
111             Element root = doc.getRootElement();
112 
113             List<String> specialNamespaces = readSpecialNamespaces(root);
114 
115             processSpecialPages(userId, node, root, specialNamespaces);
116             processRegularPages(
117                 userId, node, root, specialNamespaces, usersMap, imagesFile,
118                 options);
119             processImages(userId, node, imagesFile);
120 
121             moveFrontPage(userId, node, options);
122         }
123         catch (DocumentException de) {
124             throw new ImportFilesException("Invalid XML file provided");
125         }
126         catch (IOException de) {
127             throw new ImportFilesException("Error reading the files provided");
128         }
129         catch (PortalException e) {
130             throw e;
131         }
132         catch (Exception e) {
133             throw new PortalException(e);
134         }
135     }
136 
137     protected long getUserId(
138             long userId, WikiNode node, String author,
139             Map<String, String> usersMap)
140         throws PortalException, SystemException {
141 
142         User user = null;
143 
144         String emailAddress = usersMap.get(author);
145 
146         try {
147             if (Validator.isNull(emailAddress)) {
148                 user = UserLocalServiceUtil.getUserByScreenName(
149                     node.getCompanyId(), author.toLowerCase());
150             }
151             else {
152                 user = UserLocalServiceUtil.getUserByEmailAddress(
153                     node.getCompanyId(), emailAddress);
154             }
155         }
156         catch (NoSuchUserException nsue) {
157             user = UserLocalServiceUtil.getUserById(userId);
158         }
159 
160         return user.getUserId();
161     }
162 
163     protected void importPage(
164             long userId, String author, WikiNode node, String title,
165             String content, String summary, Map<String, String> usersMap)
166         throws PortalException {
167 
168         try {
169             long authorUserId = getUserId(userId, node, author, usersMap);
170             String parentTitle = readParentTitle(content);
171             String redirectTitle = readRedirectTitle(content);
172             String[] tagsEntries = readTagsEntries(userId, node, content);
173 
174             if (Validator.isNull(redirectTitle)) {
175                 content = _translator.translate(content);
176             }
177             else {
178                 content =
179                     StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
180                         StringPool.DOUBLE_CLOSE_BRACKET;
181             }
182 
183             WikiPage page = null;
184 
185             try {
186                 page = WikiPageLocalServiceUtil.getPage(
187                     node.getNodeId(), title);
188             }
189             catch (NoSuchPageException nspe) {
190                 page = WikiPageLocalServiceUtil.addPage(
191                     authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
192                     null, true, null, null);
193             }
194 
195             WikiPageLocalServiceUtil.updatePage(
196                 authorUserId, node.getNodeId(), title, page.getVersion(),
197                 content, summary, true, "creole", parentTitle,
198                 redirectTitle, tagsEntries, null, null);
199         }
200         catch (Exception e) {
201             throw new PortalException("Error importing page " + title, e);
202         }
203     }
204 
205     protected boolean isSpecialMediaWikiPage(
206         String title, List<String> specialNamespaces) {
207 
208         for (String namespace: specialNamespaces) {
209             if (title.startsWith(namespace + StringPool.COLON)) {
210                 return true;
211             }
212         }
213 
214         return false;
215     }
216 
217     protected boolean isValidImage(String[] paths, byte[] bytes) {
218         if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
219             return false;
220         }
221 
222         if ((paths.length > 1) &&
223             (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
224 
225             return false;
226         }
227 
228         String fileName = paths[paths.length - 1];
229 
230         try {
231             DLLocalServiceUtil.validate(fileName, bytes);
232         }
233         catch (PortalException pe) {
234             return false;
235         }
236 
237         return true;
238     }
239 
240     protected void moveFrontPage(
241         long userId, WikiNode node, Map<String, String[]> options) {
242 
243         String frontPageTitle = MapUtil.getString(
244             options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
245 
246         if (Validator.isNotNull(frontPageTitle)) {
247             frontPageTitle = normalizeTitle(frontPageTitle);
248 
249             try {
250                 if (WikiPageLocalServiceUtil.getPagesCount(
251                         node.getNodeId(), frontPageTitle, true) > 0) {
252 
253                     WikiPageLocalServiceUtil.movePage(
254                         userId, node.getNodeId(), frontPageTitle,
255                         WikiPageImpl.FRONT_PAGE, false, null, null);
256 
257                 }
258             }
259             catch (Exception e) {
260                 if (_log.isWarnEnabled()) {
261                     StringBuilder sb = new StringBuilder();
262 
263                     sb.append("Could not move ");
264                     sb.append(WikiPageImpl.FRONT_PAGE);
265                     sb.append(" to the title provided: ");
266                     sb.append(frontPageTitle);
267 
268                     _log.warn(sb.toString(), e);
269                 }
270             }
271 
272         }
273 
274     }
275 
276     protected String normalize(String categoryName, int length) {
277         categoryName = TagsUtil.toWord(categoryName.trim());
278 
279         return StringUtil.shorten(categoryName, length);
280     }
281 
282     protected String normalizeDescription(String description) {
283         description = description.replaceAll(
284             _categoriesPattern.pattern(), StringPool.BLANK);
285 
286         return normalize(description, 300);
287     }
288 
289     protected String normalizeTitle(String title) {
290         title = title.replaceAll(
291             PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
292 
293         return StringUtil.shorten(title, 75);
294     }
295 
296     private void processImages(long userId, WikiNode node, File imagesFile)
297         throws Exception {
298 
299         if ((imagesFile == null) || (!imagesFile.exists())) {
300             return;
301         }
302 
303         ProgressTracker progressTracker =
304             ProgressTrackerThreadLocal.getProgressTracker();
305 
306         int count = 0;
307 
308         ZipReader zipReader = new ZipReader(imagesFile);
309 
310         Map<String, byte[]> entries = zipReader.getEntries();
311 
312         int total = entries.size();
313 
314         if (total > 0) {
315             try {
316                 WikiPageLocalServiceUtil.getPage(
317                     node.getNodeId(), SHARED_IMAGES_TITLE);
318             }
319             catch (NoSuchPageException nspe) {
320                 WikiPageLocalServiceUtil.addPage(
321                     userId, node.getNodeId(), SHARED_IMAGES_TITLE,
322                     SHARED_IMAGES_CONTENT, null, true, null, null);
323             }
324         }
325 
326         List<ObjectValuePair<String, byte[]>> attachments =
327             new ArrayList<ObjectValuePair<String, byte[]>>();
328 
329         Iterator<Map.Entry<String, byte[]>> itr = entries.entrySet().iterator();
330 
331         int percentage = 50;
332 
333         for (int i = 0; itr.hasNext(); i++) {
334             Map.Entry<String, byte[]> entry = itr.next();
335 
336             String key = entry.getKey();
337             byte[] value = entry.getValue();
338 
339             if (key.endsWith(StringPool.SLASH)) {
340                 if (_log.isInfoEnabled()) {
341                     _log.info("Ignoring " + key);
342                 }
343 
344                 continue;
345             }
346 
347             String[] paths = StringUtil.split(key, StringPool.SLASH);
348 
349             if (!isValidImage(paths, value)) {
350                 if (_log.isInfoEnabled()) {
351                     _log.info("Ignoring " + key);
352                 }
353 
354                 continue;
355             }
356 
357             String fileName = paths[paths.length - 1].toLowerCase();
358 
359             attachments.add(
360                 new ObjectValuePair<String, byte[]>(fileName, value));
361 
362             count++;
363 
364             if ((i % 5) == 0) {
365                 WikiPageLocalServiceUtil.addPageAttachments(
366                     node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
367 
368                 attachments.clear();
369 
370                 percentage = Math.min(50 + (i * 50) / total, 99);
371 
372                 progressTracker.updateProgress(percentage);
373             }
374         }
375 
376         if (!attachments.isEmpty()) {
377             WikiPageLocalServiceUtil.addPageAttachments(
378                 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
379         }
380 
381         if (_log.isInfoEnabled()) {
382             _log.info("Imported " + count + " images into " + node.getName());
383         }
384     }
385 
386     protected void processRegularPages(
387         long userId, WikiNode node, Element root,
388         List<String> specialNamespaces, Map<String, String> usersMap,
389         File imagesFile, Map<String, String[]> options) {
390 
391         boolean importLatestVersion = MapUtil.getBoolean(
392             options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
393 
394         ProgressTracker progressTracker =
395             ProgressTrackerThreadLocal.getProgressTracker();
396 
397         int count = 0;
398 
399         List<Element> pages = root.elements("page");
400 
401         int total = pages.size();
402 
403         Iterator<Element> itr = root.elements("page").iterator();
404 
405         int percentage = 10;
406         int maxPercentage = 50;
407 
408         if ((imagesFile == null) || (!imagesFile.exists())) {
409             maxPercentage = 99;
410         }
411 
412         int percentageRange = maxPercentage - percentage;
413 
414         for (int i = 0; itr.hasNext(); i++) {
415             Element pageEl = itr.next();
416 
417             String title = pageEl.elementText("title");
418 
419             title = normalizeTitle(title);
420 
421             percentage = Math.min(
422                 10 + (i * percentageRange) / total, maxPercentage);
423 
424             progressTracker.updateProgress(percentage);
425 
426             if (isSpecialMediaWikiPage(title, specialNamespaces)) {
427                 continue;
428             }
429 
430             List<Element> revisionEls = pageEl.elements("revision");
431 
432             if (importLatestVersion) {
433                 Element lastRevisionEl = revisionEls.get(
434                     revisionEls.size() - 1);
435 
436                 revisionEls = new ArrayList<Element>();
437 
438                 revisionEls.add(lastRevisionEl);
439             }
440 
441             for (Element curRevisionEl : revisionEls) {
442                 String author = curRevisionEl.element(
443                     "contributor").elementText("username");
444                 String content = curRevisionEl.elementText("text");
445                 String summary = curRevisionEl.elementText("comment");
446 
447                 try {
448                     importPage(
449                         userId, author, node, title, content, summary,
450                         usersMap);
451                 }
452                 catch (Exception e) {
453                     if (_log.isWarnEnabled()) {
454                         StringBuilder sb = new StringBuilder();
455 
456                         sb.append("Page with title ");
457                         sb.append(title);
458                         sb.append(" could not be imported");
459 
460                         _log.warn(sb.toString(), e);
461                     }
462                 }
463             }
464 
465             count++;
466         }
467 
468         if (_log.isInfoEnabled()) {
469             _log.info("Imported " + count + " pages into " + node.getName());
470         }
471     }
472 
473     protected void processSpecialPages(
474             long userId, WikiNode node, Element root,
475             List<String> specialNamespaces)
476         throws PortalException {
477 
478         ProgressTracker progressTracker =
479             ProgressTrackerThreadLocal.getProgressTracker();
480 
481         List<Element> pages = root.elements("page");
482 
483         int total = pages.size();
484 
485         Iterator<Element> itr = pages.iterator();
486 
487         for (int i = 0; itr.hasNext(); i++) {
488             Element page = itr.next();
489 
490             String title = page.elementText("title");
491 
492             if (!title.startsWith("Category:")) {
493                 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
494                     root.remove(page);
495                 }
496 
497                 continue;
498             }
499 
500             String categoryName = title.substring("Category:".length());
501 
502             categoryName = normalize(categoryName, 75);
503 
504             String description = page.element("revision").elementText("text");
505 
506             description = normalizeDescription(description);
507 
508             try {
509                 TagsEntry tagsEntry = null;
510 
511                 try {
512                     tagsEntry = TagsEntryLocalServiceUtil.getEntry(
513                         node.getCompanyId(), categoryName);
514                 }
515                 catch (NoSuchEntryException nsee) {
516                     tagsEntry = TagsEntryLocalServiceUtil.addEntry(
517                         userId, categoryName);
518                 }
519 
520                 if (Validator.isNotNull(description)) {
521                     TagsPropertyLocalServiceUtil.addProperty(
522                         userId, tagsEntry.getEntryId(), "description",
523                         description);
524                 }
525             }
526             catch (SystemException se) {
527                  _log.error(se, se);
528             }
529 
530             if ((i % 5) == 0) {
531                 progressTracker.updateProgress((i * 10) / total);
532             }
533         }
534     }
535 
536     protected String readParentTitle(String content) {
537         Matcher matcher = _parentPattern.matcher(content);
538 
539         String redirectTitle = StringPool.BLANK;
540 
541         if (matcher.find()) {
542             redirectTitle = matcher.group(1);
543 
544             redirectTitle = normalizeTitle(redirectTitle);
545 
546             redirectTitle += " (disambiguation)";
547         }
548 
549         return redirectTitle;
550     }
551 
552     protected String readRedirectTitle(String content) {
553         Matcher matcher = _redirectPattern.matcher(content);
554 
555         String redirectTitle = StringPool.BLANK;
556 
557         if (matcher.find()) {
558             redirectTitle = matcher.group(1);
559 
560             redirectTitle = normalizeTitle(redirectTitle);
561         }
562 
563         return redirectTitle;
564     }
565 
566     protected List<String> readSpecialNamespaces(Element root)
567         throws ImportFilesException {
568 
569         List<String> namespaces = new ArrayList<String>();
570 
571         Element siteinfoEl = root.element("siteinfo");
572 
573         if (siteinfoEl == null) {
574             throw new ImportFilesException("Invalid pages XML file");
575         }
576 
577         Iterator<Element> itr = siteinfoEl.element(
578             "namespaces").elements("namespace").iterator();
579 
580         while (itr.hasNext()) {
581             Element namespace = itr.next();
582 
583             if (!namespace.attribute("key").equals("0")) {
584                 namespaces.add(namespace.getText());
585             }
586         }
587 
588         return namespaces;
589     }
590 
591     protected String[] readTagsEntries(
592             long userId, WikiNode node, String content)
593         throws PortalException, SystemException {
594 
595         Matcher matcher = _categoriesPattern.matcher(content);
596 
597         List<String> tagsEntries = new ArrayList<String>();
598 
599         while (matcher.find()) {
600             String categoryName = matcher.group(1);
601 
602             categoryName = normalize(categoryName, 75);
603 
604             TagsEntry tagsEntry = null;
605 
606             try {
607                 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
608                     node.getCompanyId(), categoryName);
609             }
610             catch (NoSuchEntryException nsee) {
611                 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
612                     userId, categoryName);
613             }
614 
615             tagsEntries.add(tagsEntry.getName());
616         }
617 
618         if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
619             tagsEntries.add(_WORK_IN_PROGRESS_TAG);
620         }
621 
622         return tagsEntries.toArray(new String[tagsEntries.size()]);
623     }
624 
625     protected Map<String, String> readUsersFile(File usersFile)
626         throws IOException {
627 
628         if ((usersFile == null) || (!usersFile.exists())) {
629             return Collections.EMPTY_MAP;
630         }
631 
632         Map<String, String> usersMap = new HashMap<String, String>();
633 
634         BufferedReader reader = new BufferedReader(new FileReader(usersFile));
635 
636         String line = reader.readLine();
637 
638         while (line != null) {
639             String[] array = StringUtil.split(line);
640 
641             if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
642                 (Validator.isNotNull(array[1]))) {
643 
644                 usersMap.put(array[0], array[1]);
645             }
646             else {
647                 if (_log.isInfoEnabled()) {
648                     _log.info(
649                         "Ignoring line " + line +
650                             " because it does not contain exactly 2 columns");
651                 }
652             }
653 
654             line = reader.readLine();
655         }
656 
657         return usersMap;
658     }
659 
660     private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = new String[]{
661         "thumb", "temp", "archive"};
662 
663     private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
664 
665     private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
666 
667     private static Log _log = LogFactory.getLog(MediaWikiImporter.class);
668 
669     private static Pattern _categoriesPattern = Pattern.compile(
670         "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
671 
672     private static Pattern _parentPattern = Pattern.compile(
673         "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
674 
675     private static Pattern _redirectPattern = Pattern.compile(
676         "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
677 
678     private MediaWikiToCreoleTranslator _translator =
679         new MediaWikiToCreoleTranslator();
680 
681 }