1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.portlet.wiki.importers.mediawiki;
24  
25  import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26  import com.liferay.portal.NoSuchUserException;
27  import com.liferay.portal.PortalException;
28  import com.liferay.portal.SystemException;
29  import com.liferay.portal.kernel.log.Log;
30  import com.liferay.portal.kernel.log.LogFactoryUtil;
31  import com.liferay.portal.kernel.util.ArrayUtil;
32  import com.liferay.portal.kernel.util.MapUtil;
33  import com.liferay.portal.kernel.util.ObjectValuePair;
34  import com.liferay.portal.kernel.util.ProgressTracker;
35  import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
36  import com.liferay.portal.kernel.util.StringPool;
37  import com.liferay.portal.kernel.util.StringUtil;
38  import com.liferay.portal.kernel.util.Validator;
39  import com.liferay.portal.kernel.xml.Document;
40  import com.liferay.portal.kernel.xml.DocumentException;
41  import com.liferay.portal.kernel.xml.Element;
42  import com.liferay.portal.kernel.xml.SAXReaderUtil;
43  import com.liferay.portal.kernel.zip.ZipReader;
44  import com.liferay.portal.model.User;
45  import com.liferay.portal.service.UserLocalServiceUtil;
46  import com.liferay.portal.util.PropsValues;
47  import com.liferay.portlet.tags.NoSuchEntryException;
48  import com.liferay.portlet.tags.model.TagsEntry;
49  import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
50  import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
51  import com.liferay.portlet.tags.util.TagsUtil;
52  import com.liferay.portlet.wiki.ImportFilesException;
53  import com.liferay.portlet.wiki.NoSuchPageException;
54  import com.liferay.portlet.wiki.importers.WikiImporter;
55  import com.liferay.portlet.wiki.importers.WikiImporterKeys;
56  import com.liferay.portlet.wiki.model.WikiNode;
57  import com.liferay.portlet.wiki.model.WikiPage;
58  import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
59  import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
60  import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
61  
62  import java.io.BufferedReader;
63  import java.io.File;
64  import java.io.FileReader;
65  import java.io.IOException;
66  
67  import java.util.ArrayList;
68  import java.util.Collections;
69  import java.util.HashMap;
70  import java.util.Iterator;
71  import java.util.List;
72  import java.util.Map;
73  import java.util.regex.Matcher;
74  import java.util.regex.Pattern;
75  
76  /**
77   * <a href="MediaWikiImporter.java.html"><b><i>View Source</i></b></a>
78   *
79   * @author Alvaro del Castillo
80   * @author Jorge Ferrer
81   */
82  public class MediaWikiImporter implements WikiImporter {
83  
84      public static final String SHARED_IMAGES_CONTENT = "See attachments";
85  
86      public static final String SHARED_IMAGES_TITLE = "SharedImages";
87  
88      public void importPages(
89              long userId, WikiNode node, File[] files,
90              Map<String, String[]> options)
91          throws PortalException {
92  
93          if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
94              throw new PortalException("The pages file is mandatory");
95          }
96  
97          File pagesFile = files[0];
98          File usersFile = files[1];
99          File imagesFile = files[2];
100 
101         try {
102             Document doc = SAXReaderUtil.read(pagesFile);
103 
104             Map<String, String> usersMap = readUsersFile(usersFile);
105 
106             Element root = doc.getRootElement();
107 
108             List<String> specialNamespaces = readSpecialNamespaces(root);
109 
110             processSpecialPages(userId, node, root, specialNamespaces);
111             processRegularPages(
112                 userId, node, root, specialNamespaces, usersMap, imagesFile,
113                 options);
114             processImages(userId, node, imagesFile);
115 
116             moveFrontPage(userId, node, options);
117         }
118         catch (DocumentException de) {
119             throw new ImportFilesException("Invalid XML file provided");
120         }
121         catch (IOException de) {
122             throw new ImportFilesException("Error reading the files provided");
123         }
124         catch (PortalException e) {
125             throw e;
126         }
127         catch (Exception e) {
128             throw new PortalException(e);
129         }
130     }
131 
132     protected long getUserId(
133             long userId, WikiNode node, String author,
134             Map<String, String> usersMap)
135         throws PortalException, SystemException {
136 
137         User user = null;
138 
139         String emailAddress = usersMap.get(author);
140 
141         try {
142             if (Validator.isNull(emailAddress)) {
143                 user = UserLocalServiceUtil.getUserByScreenName(
144                     node.getCompanyId(), author.toLowerCase());
145             }
146             else {
147                 user = UserLocalServiceUtil.getUserByEmailAddress(
148                     node.getCompanyId(), emailAddress);
149             }
150         }
151         catch (NoSuchUserException nsue) {
152             user = UserLocalServiceUtil.getUserById(userId);
153         }
154 
155         return user.getUserId();
156     }
157 
158     protected void importPage(
159             long userId, String author, WikiNode node, String title,
160             String content, String summary, Map<String, String> usersMap)
161         throws PortalException {
162 
163         try {
164             long authorUserId = getUserId(userId, node, author, usersMap);
165             String parentTitle = readParentTitle(content);
166             String redirectTitle = readRedirectTitle(content);
167             String[] tagsEntries = readTagsEntries(userId, node, content);
168 
169             if (Validator.isNull(redirectTitle)) {
170                 content = _translator.translate(content);
171             }
172             else {
173                 content =
174                     StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
175                         StringPool.DOUBLE_CLOSE_BRACKET;
176             }
177 
178             WikiPage page = null;
179 
180             try {
181                 page = WikiPageLocalServiceUtil.getPage(
182                     node.getNodeId(), title);
183             }
184             catch (NoSuchPageException nspe) {
185                 page = WikiPageLocalServiceUtil.addPage(
186                     authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
187                     null, true, null, null);
188             }
189 
190             WikiPageLocalServiceUtil.updatePage(
191                 authorUserId, node.getNodeId(), title, page.getVersion(),
192                 content, summary, true, "creole", parentTitle,
193                 redirectTitle, tagsEntries, null, null);
194         }
195         catch (Exception e) {
196             throw new PortalException("Error importing page " + title, e);
197         }
198     }
199 
200     protected boolean isSpecialMediaWikiPage(
201         String title, List<String> specialNamespaces) {
202 
203         for (String namespace: specialNamespaces) {
204             if (title.startsWith(namespace + StringPool.COLON)) {
205                 return true;
206             }
207         }
208 
209         return false;
210     }
211 
212     protected boolean isValidImage(String[] paths, byte[] bytes) {
213         if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
214             return false;
215         }
216 
217         if ((paths.length > 1) &&
218             (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
219 
220             return false;
221         }
222 
223         String fileName = paths[paths.length - 1];
224 
225         try {
226             DLLocalServiceUtil.validate(fileName, bytes);
227         }
228         catch (PortalException pe) {
229             return false;
230         }
231         catch (SystemException se) {
232             return false;
233         }
234 
235         return true;
236     }
237 
238     protected void moveFrontPage(
239         long userId, WikiNode node, Map<String, String[]> options) {
240 
241         String frontPageTitle = MapUtil.getString(
242             options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
243 
244         if (Validator.isNotNull(frontPageTitle)) {
245             frontPageTitle = normalizeTitle(frontPageTitle);
246 
247             try {
248                 if (WikiPageLocalServiceUtil.getPagesCount(
249                         node.getNodeId(), frontPageTitle, true) > 0) {
250 
251                     WikiPageLocalServiceUtil.movePage(
252                         userId, node.getNodeId(), frontPageTitle,
253                         WikiPageImpl.FRONT_PAGE, false, null, null);
254 
255                 }
256             }
257             catch (Exception e) {
258                 if (_log.isWarnEnabled()) {
259                     StringBuilder sb = new StringBuilder();
260 
261                     sb.append("Could not move ");
262                     sb.append(WikiPageImpl.FRONT_PAGE);
263                     sb.append(" to the title provided: ");
264                     sb.append(frontPageTitle);
265 
266                     _log.warn(sb.toString(), e);
267                 }
268             }
269 
270         }
271 
272     }
273 
274     protected String normalize(String categoryName, int length) {
275         categoryName = TagsUtil.toWord(categoryName.trim());
276 
277         return StringUtil.shorten(categoryName, length);
278     }
279 
280     protected String normalizeDescription(String description) {
281         description = description.replaceAll(
282             _categoriesPattern.pattern(), StringPool.BLANK);
283 
284         return normalize(description, 300);
285     }
286 
287     protected String normalizeTitle(String title) {
288         title = title.replaceAll(
289             PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
290 
291         return StringUtil.shorten(title, 75);
292     }
293 
294     private void processImages(long userId, WikiNode node, File imagesFile)
295         throws Exception {
296 
297         if ((imagesFile == null) || (!imagesFile.exists())) {
298             return;
299         }
300 
301         ProgressTracker progressTracker =
302             ProgressTrackerThreadLocal.getProgressTracker();
303 
304         int count = 0;
305 
306         ZipReader zipReader = new ZipReader(imagesFile);
307 
308         Map<String, byte[]> entries = zipReader.getEntries();
309 
310         int total = entries.size();
311 
312         if (total > 0) {
313             try {
314                 WikiPageLocalServiceUtil.getPage(
315                     node.getNodeId(), SHARED_IMAGES_TITLE);
316             }
317             catch (NoSuchPageException nspe) {
318                 WikiPageLocalServiceUtil.addPage(
319                     userId, node.getNodeId(), SHARED_IMAGES_TITLE,
320                     SHARED_IMAGES_CONTENT, null, true, null, null);
321             }
322         }
323 
324         List<ObjectValuePair<String, byte[]>> attachments =
325             new ArrayList<ObjectValuePair<String, byte[]>>();
326 
327         Iterator<Map.Entry<String, byte[]>> itr = entries.entrySet().iterator();
328 
329         int percentage = 50;
330 
331         for (int i = 0; itr.hasNext(); i++) {
332             Map.Entry<String, byte[]> entry = itr.next();
333 
334             String key = entry.getKey();
335             byte[] value = entry.getValue();
336 
337             if (key.endsWith(StringPool.SLASH)) {
338                 if (_log.isInfoEnabled()) {
339                     _log.info("Ignoring " + key);
340                 }
341 
342                 continue;
343             }
344 
345             String[] paths = StringUtil.split(key, StringPool.SLASH);
346 
347             if (!isValidImage(paths, value)) {
348                 if (_log.isInfoEnabled()) {
349                     _log.info("Ignoring " + key);
350                 }
351 
352                 continue;
353             }
354 
355             String fileName = paths[paths.length - 1].toLowerCase();
356 
357             attachments.add(
358                 new ObjectValuePair<String, byte[]>(fileName, value));
359 
360             count++;
361 
362             if ((i % 5) == 0) {
363                 WikiPageLocalServiceUtil.addPageAttachments(
364                     node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
365 
366                 attachments.clear();
367 
368                 percentage = Math.min(50 + (i * 50) / total, 99);
369 
370                 progressTracker.updateProgress(percentage);
371             }
372         }
373 
374         if (!attachments.isEmpty()) {
375             WikiPageLocalServiceUtil.addPageAttachments(
376                 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
377         }
378 
379         if (_log.isInfoEnabled()) {
380             _log.info("Imported " + count + " images into " + node.getName());
381         }
382     }
383 
384     protected void processRegularPages(
385         long userId, WikiNode node, Element root,
386         List<String> specialNamespaces, Map<String, String> usersMap,
387         File imagesFile, Map<String, String[]> options) {
388 
389         boolean importLatestVersion = MapUtil.getBoolean(
390             options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
391 
392         ProgressTracker progressTracker =
393             ProgressTrackerThreadLocal.getProgressTracker();
394 
395         int count = 0;
396 
397         List<Element> pages = root.elements("page");
398 
399         int total = pages.size();
400 
401         Iterator<Element> itr = root.elements("page").iterator();
402 
403         int percentage = 10;
404         int maxPercentage = 50;
405 
406         if ((imagesFile == null) || (!imagesFile.exists())) {
407             maxPercentage = 99;
408         }
409 
410         int percentageRange = maxPercentage - percentage;
411 
412         for (int i = 0; itr.hasNext(); i++) {
413             Element pageEl = itr.next();
414 
415             String title = pageEl.elementText("title");
416 
417             title = normalizeTitle(title);
418 
419             percentage = Math.min(
420                 10 + (i * percentageRange) / total, maxPercentage);
421 
422             progressTracker.updateProgress(percentage);
423 
424             if (isSpecialMediaWikiPage(title, specialNamespaces)) {
425                 continue;
426             }
427 
428             List<Element> revisionEls = pageEl.elements("revision");
429 
430             if (importLatestVersion) {
431                 Element lastRevisionEl = revisionEls.get(
432                     revisionEls.size() - 1);
433 
434                 revisionEls = new ArrayList<Element>();
435 
436                 revisionEls.add(lastRevisionEl);
437             }
438 
439             for (Element curRevisionEl : revisionEls) {
440                 String author = curRevisionEl.element(
441                     "contributor").elementText("username");
442                 String content = curRevisionEl.elementText("text");
443                 String summary = curRevisionEl.elementText("comment");
444 
445                 try {
446                     importPage(
447                         userId, author, node, title, content, summary,
448                         usersMap);
449                 }
450                 catch (Exception e) {
451                     if (_log.isWarnEnabled()) {
452                         StringBuilder sb = new StringBuilder();
453 
454                         sb.append("Page with title ");
455                         sb.append(title);
456                         sb.append(" could not be imported");
457 
458                         _log.warn(sb.toString(), e);
459                     }
460                 }
461             }
462 
463             count++;
464         }
465 
466         if (_log.isInfoEnabled()) {
467             _log.info("Imported " + count + " pages into " + node.getName());
468         }
469     }
470 
471     protected void processSpecialPages(
472             long userId, WikiNode node, Element root,
473             List<String> specialNamespaces)
474         throws PortalException {
475 
476         ProgressTracker progressTracker =
477             ProgressTrackerThreadLocal.getProgressTracker();
478 
479         List<Element> pages = root.elements("page");
480 
481         int total = pages.size();
482 
483         Iterator<Element> itr = pages.iterator();
484 
485         for (int i = 0; itr.hasNext(); i++) {
486             Element page = itr.next();
487 
488             String title = page.elementText("title");
489 
490             if (!title.startsWith("Category:")) {
491                 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
492                     root.remove(page);
493                 }
494 
495                 continue;
496             }
497 
498             String categoryName = title.substring("Category:".length());
499 
500             categoryName = normalize(categoryName, 75);
501 
502             String description = page.element("revision").elementText("text");
503 
504             description = normalizeDescription(description);
505 
506             try {
507                 TagsEntry tagsEntry = null;
508 
509                 try {
510                     tagsEntry = TagsEntryLocalServiceUtil.getEntry(
511                         node.getCompanyId(), categoryName);
512                 }
513                 catch (NoSuchEntryException nsee) {
514                     tagsEntry = TagsEntryLocalServiceUtil.addEntry(
515                         userId, categoryName);
516                 }
517 
518                 if (Validator.isNotNull(description)) {
519                     TagsPropertyLocalServiceUtil.addProperty(
520                         userId, tagsEntry.getEntryId(), "description",
521                         description);
522                 }
523             }
524             catch (SystemException se) {
525                  _log.error(se, se);
526             }
527 
528             if ((i % 5) == 0) {
529                 progressTracker.updateProgress((i * 10) / total);
530             }
531         }
532     }
533 
534     protected String readParentTitle(String content) {
535         Matcher matcher = _parentPattern.matcher(content);
536 
537         String redirectTitle = StringPool.BLANK;
538 
539         if (matcher.find()) {
540             redirectTitle = matcher.group(1);
541 
542             redirectTitle = normalizeTitle(redirectTitle);
543 
544             redirectTitle += " (disambiguation)";
545         }
546 
547         return redirectTitle;
548     }
549 
550     protected String readRedirectTitle(String content) {
551         Matcher matcher = _redirectPattern.matcher(content);
552 
553         String redirectTitle = StringPool.BLANK;
554 
555         if (matcher.find()) {
556             redirectTitle = matcher.group(1);
557 
558             redirectTitle = normalizeTitle(redirectTitle);
559         }
560 
561         return redirectTitle;
562     }
563 
564     protected List<String> readSpecialNamespaces(Element root)
565         throws ImportFilesException {
566 
567         List<String> namespaces = new ArrayList<String>();
568 
569         Element siteinfoEl = root.element("siteinfo");
570 
571         if (siteinfoEl == null) {
572             throw new ImportFilesException("Invalid pages XML file");
573         }
574 
575         Iterator<Element> itr = siteinfoEl.element(
576             "namespaces").elements("namespace").iterator();
577 
578         while (itr.hasNext()) {
579             Element namespace = itr.next();
580 
581             if (!namespace.attribute("key").getData().equals("0")) {
582                 namespaces.add(namespace.getText());
583             }
584         }
585 
586         return namespaces;
587     }
588 
589     protected String[] readTagsEntries(
590             long userId, WikiNode node, String content)
591         throws PortalException, SystemException {
592 
593         Matcher matcher = _categoriesPattern.matcher(content);
594 
595         List<String> tagsEntries = new ArrayList<String>();
596 
597         while (matcher.find()) {
598             String categoryName = matcher.group(1);
599 
600             categoryName = normalize(categoryName, 75);
601 
602             TagsEntry tagsEntry = null;
603 
604             try {
605                 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
606                     node.getCompanyId(), categoryName);
607             }
608             catch (NoSuchEntryException nsee) {
609                 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
610                     userId, categoryName);
611             }
612 
613             tagsEntries.add(tagsEntry.getName());
614         }
615 
616         if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
617             tagsEntries.add(_WORK_IN_PROGRESS_TAG);
618         }
619 
620         return tagsEntries.toArray(new String[tagsEntries.size()]);
621     }
622 
623     protected Map<String, String> readUsersFile(File usersFile)
624         throws IOException {
625 
626         if ((usersFile == null) || (!usersFile.exists())) {
627             return Collections.EMPTY_MAP;
628         }
629 
630         Map<String, String> usersMap = new HashMap<String, String>();
631 
632         BufferedReader reader = new BufferedReader(new FileReader(usersFile));
633 
634         String line = reader.readLine();
635 
636         while (line != null) {
637             String[] array = StringUtil.split(line);
638 
639             if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
640                 (Validator.isNotNull(array[1]))) {
641 
642                 usersMap.put(array[0], array[1]);
643             }
644             else {
645                 if (_log.isInfoEnabled()) {
646                     _log.info(
647                         "Ignoring line " + line +
648                             " because it does not contain exactly 2 columns");
649                 }
650             }
651 
652             line = reader.readLine();
653         }
654 
655         return usersMap;
656     }
657 
658     private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
659         "thumb", "temp", "archive"
660     };
661 
662     private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
663 
664     private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
665 
666     private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
667 
668     private static Pattern _categoriesPattern = Pattern.compile(
669         "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
670     private static Pattern _parentPattern = Pattern.compile(
671         "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
672     private static Pattern _redirectPattern = Pattern.compile(
673         "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
674 
675     private MediaWikiToCreoleTranslator _translator =
676         new MediaWikiToCreoleTranslator();
677 
678 }