1   /**
2    * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
3    *
4    * The contents of this file are subject to the terms of the Liferay Enterprise
5    * Subscription License ("License"). You may not use this file except in
6    * compliance with the License. You can obtain a copy of the License by
7    * contacting Liferay, Inc. See the License for the specific language governing
8    * permissions and limitations under the License, including but not limited to
9    * distribution rights of the Software.
10   *
11   *
12   *
13   */
14  
15  package com.liferay.portlet.wiki.importers.mediawiki;
16  
17  import com.liferay.documentlibrary.service.DLLocalServiceUtil;
18  import com.liferay.portal.NoSuchUserException;
19  import com.liferay.portal.PortalException;
20  import com.liferay.portal.SystemException;
21  import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
22  import com.liferay.portal.kernel.log.Log;
23  import com.liferay.portal.kernel.log.LogFactoryUtil;
24  import com.liferay.portal.kernel.util.ArrayUtil;
25  import com.liferay.portal.kernel.util.MapUtil;
26  import com.liferay.portal.kernel.util.ObjectValuePair;
27  import com.liferay.portal.kernel.util.ProgressTracker;
28  import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
29  import com.liferay.portal.kernel.util.StringBundler;
30  import com.liferay.portal.kernel.util.StringPool;
31  import com.liferay.portal.kernel.util.StringUtil;
32  import com.liferay.portal.kernel.util.Validator;
33  import com.liferay.portal.kernel.xml.Document;
34  import com.liferay.portal.kernel.xml.DocumentException;
35  import com.liferay.portal.kernel.xml.Element;
36  import com.liferay.portal.kernel.xml.SAXReaderUtil;
37  import com.liferay.portal.kernel.zip.ZipReader;
38  import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
39  import com.liferay.portal.model.User;
40  import com.liferay.portal.service.ServiceContext;
41  import com.liferay.portal.service.UserLocalServiceUtil;
42  import com.liferay.portal.util.PropsValues;
43  import com.liferay.portlet.tags.NoSuchEntryException;
44  import com.liferay.portlet.tags.model.TagsEntry;
45  import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
46  import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
47  import com.liferay.portlet.tags.util.TagsUtil;
48  import com.liferay.portlet.wiki.ImportFilesException;
49  import com.liferay.portlet.wiki.NoSuchPageException;
50  import com.liferay.portlet.wiki.importers.WikiImporter;
51  import com.liferay.portlet.wiki.importers.WikiImporterKeys;
52  import com.liferay.portlet.wiki.model.WikiNode;
53  import com.liferay.portlet.wiki.model.WikiPage;
54  import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
55  import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
56  import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
57  
58  import java.io.File;
59  import java.io.FileReader;
60  import java.io.IOException;
61  
62  import java.util.ArrayList;
63  import java.util.Collections;
64  import java.util.HashMap;
65  import java.util.Iterator;
66  import java.util.List;
67  import java.util.Map;
68  import java.util.regex.Matcher;
69  import java.util.regex.Pattern;
70  
71  /**
72   * <a href="MediaWikiImporter.java.html"><b><i>View Source</i></b></a>
73   *
74   * @author Alvaro del Castillo
75   * @author Jorge Ferrer
76   */
77  public class MediaWikiImporter implements WikiImporter {
78  
79      public static final String SHARED_IMAGES_CONTENT = "See attachments";
80  
81      public static final String SHARED_IMAGES_TITLE = "SharedImages";
82  
83      public void importPages(
84              long userId, WikiNode node, File[] files,
85              Map<String, String[]> options)
86          throws PortalException {
87  
88          if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
89              throw new PortalException("The pages file is mandatory");
90          }
91  
92          File pagesFile = files[0];
93          File usersFile = files[1];
94          File imagesFile = files[2];
95  
96          try {
97              Document doc = SAXReaderUtil.read(pagesFile);
98  
99              Map<String, String> usersMap = readUsersFile(usersFile);
100 
101             Element root = doc.getRootElement();
102 
103             List<String> specialNamespaces = readSpecialNamespaces(root);
104 
105             processSpecialPages(userId, node, root, specialNamespaces);
106             processRegularPages(
107                 userId, node, root, specialNamespaces, usersMap, imagesFile,
108                 options);
109             processImages(userId, node, imagesFile);
110 
111             moveFrontPage(userId, node, options);
112         }
113         catch (DocumentException de) {
114             throw new ImportFilesException("Invalid XML file provided");
115         }
116         catch (IOException de) {
117             throw new ImportFilesException("Error reading the files provided");
118         }
119         catch (PortalException e) {
120             throw e;
121         }
122         catch (Exception e) {
123             throw new PortalException(e);
124         }
125     }
126 
127     protected long getUserId(
128             long userId, WikiNode node, String author,
129             Map<String, String> usersMap)
130         throws PortalException, SystemException {
131 
132         User user = null;
133 
134         String emailAddress = usersMap.get(author);
135 
136         try {
137             if (Validator.isNull(emailAddress)) {
138                 user = UserLocalServiceUtil.getUserByScreenName(
139                     node.getCompanyId(), author.toLowerCase());
140             }
141             else {
142                 user = UserLocalServiceUtil.getUserByEmailAddress(
143                     node.getCompanyId(), emailAddress);
144             }
145         }
146         catch (NoSuchUserException nsue) {
147             user = UserLocalServiceUtil.getUserById(userId);
148         }
149 
150         return user.getUserId();
151     }
152 
153     protected void importPage(
154             long userId, String author, WikiNode node, String title,
155             String content, String summary, Map<String, String> usersMap)
156         throws PortalException {
157 
158         try {
159             long authorUserId = getUserId(userId, node, author, usersMap);
160             String parentTitle = readParentTitle(content);
161             String redirectTitle = readRedirectTitle(content);
162 
163             ServiceContext serviceContext = new ServiceContext();
164 
165             serviceContext.setAddCommunityPermissions(true);
166             serviceContext.setAddGuestPermissions(true);
167             serviceContext.setTagsEntries(
168                 readTagsEntries(userId, node, content));
169 
170             if (Validator.isNull(redirectTitle)) {
171                 content = _translator.translate(content);
172             }
173             else {
174                 content =
175                     StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
176                         StringPool.DOUBLE_CLOSE_BRACKET;
177             }
178 
179             WikiPage page = null;
180 
181             try {
182                 page = WikiPageLocalServiceUtil.getPage(
183                     node.getNodeId(), title);
184             }
185             catch (NoSuchPageException nspe) {
186                 page = WikiPageLocalServiceUtil.addPage(
187                     authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
188                     null, true, serviceContext);
189             }
190 
191             WikiPageLocalServiceUtil.updatePage(
192                 authorUserId, node.getNodeId(), title, page.getVersion(),
193                 content, summary, true, "creole", parentTitle, redirectTitle,
194                 serviceContext);
195         }
196         catch (Exception e) {
197             throw new PortalException("Error importing page " + title, e);
198         }
199     }
200 
201     protected boolean isSpecialMediaWikiPage(
202         String title, List<String> specialNamespaces) {
203 
204         for (String namespace: specialNamespaces) {
205             if (title.startsWith(namespace + StringPool.COLON)) {
206                 return true;
207             }
208         }
209 
210         return false;
211     }
212 
213     protected boolean isValidImage(String[] paths, byte[] bytes) {
214         if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
215             return false;
216         }
217 
218         if ((paths.length > 1) &&
219             (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
220 
221             return false;
222         }
223 
224         String fileName = paths[paths.length - 1];
225 
226         try {
227             DLLocalServiceUtil.validate(fileName, bytes);
228         }
229         catch (PortalException pe) {
230             return false;
231         }
232         catch (SystemException se) {
233             return false;
234         }
235 
236         return true;
237     }
238 
239     protected void moveFrontPage(
240         long userId, WikiNode node, Map<String, String[]> options) {
241 
242         String frontPageTitle = MapUtil.getString(
243             options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
244 
245         if (Validator.isNotNull(frontPageTitle)) {
246             frontPageTitle = normalizeTitle(frontPageTitle);
247 
248             try {
249                 if (WikiPageLocalServiceUtil.getPagesCount(
250                         node.getNodeId(), frontPageTitle, true) > 0) {
251 
252                     ServiceContext serviceContext = new ServiceContext();
253 
254                     serviceContext.setAddCommunityPermissions(true);
255                     serviceContext.setAddGuestPermissions(true);
256 
257                     WikiPageLocalServiceUtil.movePage(
258                         userId, node.getNodeId(), frontPageTitle,
259                         WikiPageImpl.FRONT_PAGE, false, serviceContext);
260 
261                 }
262             }
263             catch (Exception e) {
264                 if (_log.isWarnEnabled()) {
265                     StringBundler sb = new StringBundler(4);
266 
267                     sb.append("Could not move ");
268                     sb.append(WikiPageImpl.FRONT_PAGE);
269                     sb.append(" to the title provided: ");
270                     sb.append(frontPageTitle);
271 
272                     _log.warn(sb.toString(), e);
273                 }
274             }
275 
276         }
277 
278     }
279 
280     protected String normalize(String categoryName, int length) {
281         categoryName = TagsUtil.toWord(categoryName.trim());
282 
283         return StringUtil.shorten(categoryName, length);
284     }
285 
286     protected String normalizeDescription(String description) {
287         description = description.replaceAll(
288             _categoriesPattern.pattern(), StringPool.BLANK);
289 
290         return normalize(description, 300);
291     }
292 
293     protected String normalizeTitle(String title) {
294         title = title.replaceAll(
295             PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
296 
297         return StringUtil.shorten(title, 75);
298     }
299 
300     protected void processImages(long userId, WikiNode node, File imagesFile)
301         throws Exception {
302 
303         if ((imagesFile == null) || (!imagesFile.exists())) {
304             return;
305         }
306 
307         ProgressTracker progressTracker =
308             ProgressTrackerThreadLocal.getProgressTracker();
309 
310         int count = 0;
311 
312         ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(imagesFile);
313 
314         List<String> entries = zipReader.getEntries();
315 
316         int total = entries.size();
317 
318         if (total > 0) {
319             try {
320                 WikiPageLocalServiceUtil.getPage(
321                     node.getNodeId(), SHARED_IMAGES_TITLE);
322             }
323             catch (NoSuchPageException nspe) {
324                 ServiceContext serviceContext = new ServiceContext();
325 
326                 serviceContext.setAddCommunityPermissions(true);
327                 serviceContext.setAddGuestPermissions(true);
328 
329                 WikiPageLocalServiceUtil.addPage(
330                     userId, node.getNodeId(), SHARED_IMAGES_TITLE,
331                     SHARED_IMAGES_CONTENT, null, true, serviceContext);
332             }
333         }
334 
335         List<ObjectValuePair<String, byte[]>> attachments =
336             new ArrayList<ObjectValuePair<String, byte[]>>();
337 
338         int percentage = 50;
339 
340         for (int i = 0; i < entries.size(); i++) {
341             String entry = entries.get(i);
342 
343             String key = entry;
344             byte[] value = zipReader.getEntryAsByteArray(entry);
345 
346             if (key.endsWith(StringPool.SLASH)) {
347                 if (_log.isInfoEnabled()) {
348                     _log.info("Ignoring " + key);
349                 }
350 
351                 continue;
352             }
353 
354             String[] paths = StringUtil.split(key, StringPool.SLASH);
355 
356             if (!isValidImage(paths, value)) {
357                 if (_log.isInfoEnabled()) {
358                     _log.info("Ignoring " + key);
359                 }
360 
361                 continue;
362             }
363 
364             String fileName = paths[paths.length - 1].toLowerCase();
365 
366             attachments.add(
367                 new ObjectValuePair<String, byte[]>(fileName, value));
368 
369             count++;
370 
371             if ((i % 5) == 0) {
372                 WikiPageLocalServiceUtil.addPageAttachments(
373                     node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
374 
375                 attachments.clear();
376 
377                 percentage = Math.min(50 + (i * 50) / total, 99);
378 
379                 progressTracker.updateProgress(percentage);
380             }
381         }
382 
383         if (!attachments.isEmpty()) {
384             WikiPageLocalServiceUtil.addPageAttachments(
385                 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
386         }
387 
388         zipReader.close();
389 
390         if (_log.isInfoEnabled()) {
391             _log.info("Imported " + count + " images into " + node.getName());
392         }
393     }
394 
395     protected void processRegularPages(
396         long userId, WikiNode node, Element root,
397         List<String> specialNamespaces, Map<String, String> usersMap,
398         File imagesFile, Map<String, String[]> options) {
399 
400         boolean importLatestVersion = MapUtil.getBoolean(
401             options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
402 
403         ProgressTracker progressTracker =
404             ProgressTrackerThreadLocal.getProgressTracker();
405 
406         int count = 0;
407 
408         List<Element> pages = root.elements("page");
409 
410         int total = pages.size();
411 
412         Iterator<Element> itr = root.elements("page").iterator();
413 
414         int percentage = 10;
415         int maxPercentage = 50;
416 
417         if ((imagesFile == null) || (!imagesFile.exists())) {
418             maxPercentage = 99;
419         }
420 
421         int percentageRange = maxPercentage - percentage;
422 
423         for (int i = 0; itr.hasNext(); i++) {
424             Element pageEl = itr.next();
425 
426             String title = pageEl.elementText("title");
427 
428             title = normalizeTitle(title);
429 
430             percentage = Math.min(
431                 10 + (i * percentageRange) / total, maxPercentage);
432 
433             progressTracker.updateProgress(percentage);
434 
435             if (isSpecialMediaWikiPage(title, specialNamespaces)) {
436                 continue;
437             }
438 
439             List<Element> revisionEls = pageEl.elements("revision");
440 
441             if (importLatestVersion) {
442                 Element lastRevisionEl = revisionEls.get(
443                     revisionEls.size() - 1);
444 
445                 revisionEls = new ArrayList<Element>();
446 
447                 revisionEls.add(lastRevisionEl);
448             }
449 
450             for (Element curRevisionEl : revisionEls) {
451                 String author = curRevisionEl.element(
452                     "contributor").elementText("username");
453                 String content = curRevisionEl.elementText("text");
454                 String summary = curRevisionEl.elementText("comment");
455 
456                 try {
457                     importPage(
458                         userId, author, node, title, content, summary,
459                         usersMap);
460                 }
461                 catch (Exception e) {
462                     if (_log.isWarnEnabled()) {
463                         StringBundler sb = new StringBundler(3);
464 
465                         sb.append("Page with title ");
466                         sb.append(title);
467                         sb.append(" could not be imported");
468 
469                         _log.warn(sb.toString(), e);
470                     }
471                 }
472             }
473 
474             count++;
475         }
476 
477         if (_log.isInfoEnabled()) {
478             _log.info("Imported " + count + " pages into " + node.getName());
479         }
480     }
481 
482     protected void processSpecialPages(
483             long userId, WikiNode node, Element root,
484             List<String> specialNamespaces)
485         throws PortalException {
486 
487         ProgressTracker progressTracker =
488             ProgressTrackerThreadLocal.getProgressTracker();
489 
490         List<Element> pages = root.elements("page");
491 
492         int total = pages.size();
493 
494         Iterator<Element> itr = pages.iterator();
495 
496         for (int i = 0; itr.hasNext(); i++) {
497             Element page = itr.next();
498 
499             String title = page.elementText("title");
500 
501             if (!title.startsWith("Category:")) {
502                 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
503                     root.remove(page);
504                 }
505 
506                 continue;
507             }
508 
509             String categoryName = title.substring("Category:".length());
510 
511             categoryName = normalize(categoryName, 75);
512 
513             String description = page.element("revision").elementText("text");
514 
515             description = normalizeDescription(description);
516 
517             try {
518                 TagsEntry tagsEntry = null;
519 
520                 try {
521                     tagsEntry = TagsEntryLocalServiceUtil.getEntry(
522                         node.getCompanyId(), categoryName);
523                 }
524                 catch (NoSuchEntryException nsee) {
525                     ServiceContext serviceContext = new ServiceContext();
526 
527                     serviceContext.setAddCommunityPermissions(true);
528                     serviceContext.setAddGuestPermissions(true);
529                     serviceContext.setScopeGroupId(node.getGroupId());
530 
531                     tagsEntry = TagsEntryLocalServiceUtil.addEntry(
532                         userId, null, categoryName, null, null, serviceContext);
533                 }
534 
535                 if (Validator.isNotNull(description)) {
536                     TagsPropertyLocalServiceUtil.addProperty(
537                         userId, tagsEntry.getEntryId(), "description",
538                         description);
539                 }
540             }
541             catch (SystemException se) {
542                  _log.error(se, se);
543             }
544 
545             if ((i % 5) == 0) {
546                 progressTracker.updateProgress((i * 10) / total);
547             }
548         }
549     }
550 
551     protected String readParentTitle(String content) {
552         Matcher matcher = _parentPattern.matcher(content);
553 
554         String redirectTitle = StringPool.BLANK;
555 
556         if (matcher.find()) {
557             redirectTitle = matcher.group(1);
558 
559             redirectTitle = normalizeTitle(redirectTitle);
560 
561             redirectTitle += " (disambiguation)";
562         }
563 
564         return redirectTitle;
565     }
566 
567     protected String readRedirectTitle(String content) {
568         Matcher matcher = _redirectPattern.matcher(content);
569 
570         String redirectTitle = StringPool.BLANK;
571 
572         if (matcher.find()) {
573             redirectTitle = matcher.group(1);
574 
575             redirectTitle = normalizeTitle(redirectTitle);
576         }
577 
578         return redirectTitle;
579     }
580 
581     protected List<String> readSpecialNamespaces(Element root)
582         throws ImportFilesException {
583 
584         List<String> namespaces = new ArrayList<String>();
585 
586         Element siteinfoEl = root.element("siteinfo");
587 
588         if (siteinfoEl == null) {
589             throw new ImportFilesException("Invalid pages XML file");
590         }
591 
592         Iterator<Element> itr = siteinfoEl.element(
593             "namespaces").elements("namespace").iterator();
594 
595         while (itr.hasNext()) {
596             Element namespace = itr.next();
597 
598             if (!namespace.attribute("key").getData().equals("0")) {
599                 namespaces.add(namespace.getText());
600             }
601         }
602 
603         return namespaces;
604     }
605 
606     protected String[] readTagsEntries(
607             long userId, WikiNode node, String content)
608         throws PortalException, SystemException {
609 
610         Matcher matcher = _categoriesPattern.matcher(content);
611 
612         List<String> tagsEntries = new ArrayList<String>();
613 
614         while (matcher.find()) {
615             String categoryName = matcher.group(1);
616 
617             categoryName = normalize(categoryName, 75);
618 
619             TagsEntry tagsEntry = null;
620 
621             try {
622                 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
623                     node.getGroupId(), categoryName);
624             }
625             catch (NoSuchEntryException nsee) {
626                 ServiceContext serviceContext = new ServiceContext();
627 
628                 serviceContext.setAddCommunityPermissions(true);
629                 serviceContext.setAddGuestPermissions(true);
630                 serviceContext.setScopeGroupId(node.getGroupId());
631 
632                 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
633                     userId, null, categoryName, null, null, serviceContext);
634             }
635 
636             tagsEntries.add(tagsEntry.getName());
637         }
638 
639         if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
640             tagsEntries.add(_WORK_IN_PROGRESS_TAG);
641         }
642 
643         return tagsEntries.toArray(new String[tagsEntries.size()]);
644     }
645 
646     protected Map<String, String> readUsersFile(File usersFile)
647         throws IOException {
648 
649         if ((usersFile == null) || (!usersFile.exists())) {
650             return Collections.EMPTY_MAP;
651         }
652 
653         Map<String, String> usersMap = new HashMap<String, String>();
654 
655         UnsyncBufferedReader unsyncBufferedReader =
656             new UnsyncBufferedReader(new FileReader(usersFile));
657 
658         String line = unsyncBufferedReader.readLine();
659 
660         while (line != null) {
661             String[] array = StringUtil.split(line);
662 
663             if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
664                 (Validator.isNotNull(array[1]))) {
665 
666                 usersMap.put(array[0], array[1]);
667             }
668             else {
669                 if (_log.isInfoEnabled()) {
670                     _log.info(
671                         "Ignoring line " + line +
672                             " because it does not contain exactly 2 columns");
673                 }
674             }
675 
676             line = unsyncBufferedReader.readLine();
677         }
678 
679         return usersMap;
680     }
681 
682     private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
683         "thumb", "temp", "archive"
684     };
685 
686     private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
687 
688     private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
689 
690     private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
691 
692     private static Pattern _categoriesPattern = Pattern.compile(
693         "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
694     private static Pattern _parentPattern = Pattern.compile(
695         "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
696     private static Pattern _redirectPattern = Pattern.compile(
697         "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
698 
699     private MediaWikiToCreoleTranslator _translator =
700         new MediaWikiToCreoleTranslator();
701 
702 }