1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.portlet.wiki.importers.mediawiki;
24  
25  import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26  import com.liferay.portal.NoSuchUserException;
27  import com.liferay.portal.PortalException;
28  import com.liferay.portal.SystemException;
29  import com.liferay.portal.kernel.log.Log;
30  import com.liferay.portal.kernel.log.LogFactoryUtil;
31  import com.liferay.portal.kernel.util.ArrayUtil;
32  import com.liferay.portal.kernel.util.MapUtil;
33  import com.liferay.portal.kernel.util.ObjectValuePair;
34  import com.liferay.portal.kernel.util.ProgressTracker;
35  import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
36  import com.liferay.portal.kernel.util.StringPool;
37  import com.liferay.portal.kernel.util.StringUtil;
38  import com.liferay.portal.kernel.util.Validator;
39  import com.liferay.portal.kernel.xml.Document;
40  import com.liferay.portal.kernel.xml.DocumentException;
41  import com.liferay.portal.kernel.xml.Element;
42  import com.liferay.portal.kernel.xml.SAXReaderUtil;
43  import com.liferay.portal.kernel.zip.ZipReader;
44  import com.liferay.portal.model.User;
45  import com.liferay.portal.service.UserLocalServiceUtil;
46  import com.liferay.portal.util.PropsValues;
47  import com.liferay.portlet.tags.NoSuchEntryException;
48  import com.liferay.portlet.tags.model.TagsEntry;
49  import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
50  import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
51  import com.liferay.portlet.tags.util.TagsUtil;
52  import com.liferay.portlet.wiki.ImportFilesException;
53  import com.liferay.portlet.wiki.NoSuchPageException;
54  import com.liferay.portlet.wiki.importers.WikiImporter;
55  import com.liferay.portlet.wiki.importers.WikiImporterKeys;
56  import com.liferay.portlet.wiki.model.WikiNode;
57  import com.liferay.portlet.wiki.model.WikiPage;
58  import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
59  import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
60  import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
61  
62  import java.io.BufferedReader;
63  import java.io.File;
64  import java.io.FileReader;
65  import java.io.IOException;
66  
67  import java.util.ArrayList;
68  import java.util.Collections;
69  import java.util.HashMap;
70  import java.util.Iterator;
71  import java.util.List;
72  import java.util.Map;
73  import java.util.regex.Matcher;
74  import java.util.regex.Pattern;
75  
76  /**
77   * <a href="MediaWikiImporter.java.html"><b><i>View Source</i></b></a>
78   *
79   * @author Alvaro del Castillo
80   * @author Jorge Ferrer
81   *
82   */
83  public class MediaWikiImporter implements WikiImporter {
84  
85      public static final String SHARED_IMAGES_CONTENT = "See attachments";
86  
87      public static final String SHARED_IMAGES_TITLE = "SharedImages";
88  
89      public void importPages(
90              long userId, WikiNode node, File[] files,
91              Map<String, String[]> options)
92          throws PortalException {
93  
94          if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
95              throw new PortalException("The pages file is mandatory");
96          }
97  
98          File pagesFile = files[0];
99          File usersFile = files[1];
100         File imagesFile = files[2];
101 
102         try {
103             Document doc = SAXReaderUtil.read(pagesFile);
104 
105             Map<String, String> usersMap = readUsersFile(usersFile);
106 
107             Element root = doc.getRootElement();
108 
109             List<String> specialNamespaces = readSpecialNamespaces(root);
110 
111             processSpecialPages(userId, node, root, specialNamespaces);
112             processRegularPages(
113                 userId, node, root, specialNamespaces, usersMap, imagesFile,
114                 options);
115             processImages(userId, node, imagesFile);
116 
117             moveFrontPage(userId, node, options);
118         }
119         catch (DocumentException de) {
120             throw new ImportFilesException("Invalid XML file provided");
121         }
122         catch (IOException de) {
123             throw new ImportFilesException("Error reading the files provided");
124         }
125         catch (PortalException e) {
126             throw e;
127         }
128         catch (Exception e) {
129             throw new PortalException(e);
130         }
131     }
132 
133     protected long getUserId(
134             long userId, WikiNode node, String author,
135             Map<String, String> usersMap)
136         throws PortalException, SystemException {
137 
138         User user = null;
139 
140         String emailAddress = usersMap.get(author);
141 
142         try {
143             if (Validator.isNull(emailAddress)) {
144                 user = UserLocalServiceUtil.getUserByScreenName(
145                     node.getCompanyId(), author.toLowerCase());
146             }
147             else {
148                 user = UserLocalServiceUtil.getUserByEmailAddress(
149                     node.getCompanyId(), emailAddress);
150             }
151         }
152         catch (NoSuchUserException nsue) {
153             user = UserLocalServiceUtil.getUserById(userId);
154         }
155 
156         return user.getUserId();
157     }
158 
159     protected void importPage(
160             long userId, String author, WikiNode node, String title,
161             String content, String summary, Map<String, String> usersMap)
162         throws PortalException {
163 
164         try {
165             long authorUserId = getUserId(userId, node, author, usersMap);
166             String parentTitle = readParentTitle(content);
167             String redirectTitle = readRedirectTitle(content);
168             String[] tagsEntries = readTagsEntries(userId, node, content);
169 
170             if (Validator.isNull(redirectTitle)) {
171                 content = _translator.translate(content);
172             }
173             else {
174                 content =
175                     StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
176                         StringPool.DOUBLE_CLOSE_BRACKET;
177             }
178 
179             WikiPage page = null;
180 
181             try {
182                 page = WikiPageLocalServiceUtil.getPage(
183                     node.getNodeId(), title);
184             }
185             catch (NoSuchPageException nspe) {
186                 page = WikiPageLocalServiceUtil.addPage(
187                     authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
188                     null, true, null, null);
189             }
190 
191             WikiPageLocalServiceUtil.updatePage(
192                 authorUserId, node.getNodeId(), title, page.getVersion(),
193                 content, summary, true, "creole", parentTitle,
194                 redirectTitle, tagsEntries, null, null);
195         }
196         catch (Exception e) {
197             throw new PortalException("Error importing page " + title, e);
198         }
199     }
200 
201     protected boolean isSpecialMediaWikiPage(
202         String title, List<String> specialNamespaces) {
203 
204         for (String namespace: specialNamespaces) {
205             if (title.startsWith(namespace + StringPool.COLON)) {
206                 return true;
207             }
208         }
209 
210         return false;
211     }
212 
213     protected boolean isValidImage(String[] paths, byte[] bytes) {
214         if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
215             return false;
216         }
217 
218         if ((paths.length > 1) &&
219             (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
220 
221             return false;
222         }
223 
224         String fileName = paths[paths.length - 1];
225 
226         try {
227             DLLocalServiceUtil.validate(fileName, bytes);
228         }
229         catch (PortalException pe) {
230             return false;
231         }
232         catch (SystemException se) {
233             return false;
234         }
235 
236         return true;
237     }
238 
239     protected void moveFrontPage(
240         long userId, WikiNode node, Map<String, String[]> options) {
241 
242         String frontPageTitle = MapUtil.getString(
243             options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
244 
245         if (Validator.isNotNull(frontPageTitle)) {
246             frontPageTitle = normalizeTitle(frontPageTitle);
247 
248             try {
249                 if (WikiPageLocalServiceUtil.getPagesCount(
250                         node.getNodeId(), frontPageTitle, true) > 0) {
251 
252                     WikiPageLocalServiceUtil.movePage(
253                         userId, node.getNodeId(), frontPageTitle,
254                         WikiPageImpl.FRONT_PAGE, false, null, null);
255 
256                 }
257             }
258             catch (Exception e) {
259                 if (_log.isWarnEnabled()) {
260                     StringBuilder sb = new StringBuilder();
261 
262                     sb.append("Could not move ");
263                     sb.append(WikiPageImpl.FRONT_PAGE);
264                     sb.append(" to the title provided: ");
265                     sb.append(frontPageTitle);
266 
267                     _log.warn(sb.toString(), e);
268                 }
269             }
270 
271         }
272 
273     }
274 
275     protected String normalize(String categoryName, int length) {
276         categoryName = TagsUtil.toWord(categoryName.trim());
277 
278         return StringUtil.shorten(categoryName, length);
279     }
280 
281     protected String normalizeDescription(String description) {
282         description = description.replaceAll(
283             _categoriesPattern.pattern(), StringPool.BLANK);
284 
285         return normalize(description, 300);
286     }
287 
288     protected String normalizeTitle(String title) {
289         title = title.replaceAll(
290             PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
291 
292         return StringUtil.shorten(title, 75);
293     }
294 
295     private void processImages(long userId, WikiNode node, File imagesFile)
296         throws Exception {
297 
298         if ((imagesFile == null) || (!imagesFile.exists())) {
299             return;
300         }
301 
302         ProgressTracker progressTracker =
303             ProgressTrackerThreadLocal.getProgressTracker();
304 
305         int count = 0;
306 
307         ZipReader zipReader = new ZipReader(imagesFile);
308 
309         Map<String, byte[]> entries = zipReader.getEntries();
310 
311         int total = entries.size();
312 
313         if (total > 0) {
314             try {
315                 WikiPageLocalServiceUtil.getPage(
316                     node.getNodeId(), SHARED_IMAGES_TITLE);
317             }
318             catch (NoSuchPageException nspe) {
319                 WikiPageLocalServiceUtil.addPage(
320                     userId, node.getNodeId(), SHARED_IMAGES_TITLE,
321                     SHARED_IMAGES_CONTENT, null, true, null, null);
322             }
323         }
324 
325         List<ObjectValuePair<String, byte[]>> attachments =
326             new ArrayList<ObjectValuePair<String, byte[]>>();
327 
328         Iterator<Map.Entry<String, byte[]>> itr = entries.entrySet().iterator();
329 
330         int percentage = 50;
331 
332         for (int i = 0; itr.hasNext(); i++) {
333             Map.Entry<String, byte[]> entry = itr.next();
334 
335             String key = entry.getKey();
336             byte[] value = entry.getValue();
337 
338             if (key.endsWith(StringPool.SLASH)) {
339                 if (_log.isInfoEnabled()) {
340                     _log.info("Ignoring " + key);
341                 }
342 
343                 continue;
344             }
345 
346             String[] paths = StringUtil.split(key, StringPool.SLASH);
347 
348             if (!isValidImage(paths, value)) {
349                 if (_log.isInfoEnabled()) {
350                     _log.info("Ignoring " + key);
351                 }
352 
353                 continue;
354             }
355 
356             String fileName = paths[paths.length - 1].toLowerCase();
357 
358             attachments.add(
359                 new ObjectValuePair<String, byte[]>(fileName, value));
360 
361             count++;
362 
363             if ((i % 5) == 0) {
364                 WikiPageLocalServiceUtil.addPageAttachments(
365                     node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
366 
367                 attachments.clear();
368 
369                 percentage = Math.min(50 + (i * 50) / total, 99);
370 
371                 progressTracker.updateProgress(percentage);
372             }
373         }
374 
375         if (!attachments.isEmpty()) {
376             WikiPageLocalServiceUtil.addPageAttachments(
377                 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
378         }
379 
380         if (_log.isInfoEnabled()) {
381             _log.info("Imported " + count + " images into " + node.getName());
382         }
383     }
384 
385     protected void processRegularPages(
386         long userId, WikiNode node, Element root,
387         List<String> specialNamespaces, Map<String, String> usersMap,
388         File imagesFile, Map<String, String[]> options) {
389 
390         boolean importLatestVersion = MapUtil.getBoolean(
391             options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
392 
393         ProgressTracker progressTracker =
394             ProgressTrackerThreadLocal.getProgressTracker();
395 
396         int count = 0;
397 
398         List<Element> pages = root.elements("page");
399 
400         int total = pages.size();
401 
402         Iterator<Element> itr = root.elements("page").iterator();
403 
404         int percentage = 10;
405         int maxPercentage = 50;
406 
407         if ((imagesFile == null) || (!imagesFile.exists())) {
408             maxPercentage = 99;
409         }
410 
411         int percentageRange = maxPercentage - percentage;
412 
413         for (int i = 0; itr.hasNext(); i++) {
414             Element pageEl = itr.next();
415 
416             String title = pageEl.elementText("title");
417 
418             title = normalizeTitle(title);
419 
420             percentage = Math.min(
421                 10 + (i * percentageRange) / total, maxPercentage);
422 
423             progressTracker.updateProgress(percentage);
424 
425             if (isSpecialMediaWikiPage(title, specialNamespaces)) {
426                 continue;
427             }
428 
429             List<Element> revisionEls = pageEl.elements("revision");
430 
431             if (importLatestVersion) {
432                 Element lastRevisionEl = revisionEls.get(
433                     revisionEls.size() - 1);
434 
435                 revisionEls = new ArrayList<Element>();
436 
437                 revisionEls.add(lastRevisionEl);
438             }
439 
440             for (Element curRevisionEl : revisionEls) {
441                 String author = curRevisionEl.element(
442                     "contributor").elementText("username");
443                 String content = curRevisionEl.elementText("text");
444                 String summary = curRevisionEl.elementText("comment");
445 
446                 try {
447                     importPage(
448                         userId, author, node, title, content, summary,
449                         usersMap);
450                 }
451                 catch (Exception e) {
452                     if (_log.isWarnEnabled()) {
453                         StringBuilder sb = new StringBuilder();
454 
455                         sb.append("Page with title ");
456                         sb.append(title);
457                         sb.append(" could not be imported");
458 
459                         _log.warn(sb.toString(), e);
460                     }
461                 }
462             }
463 
464             count++;
465         }
466 
467         if (_log.isInfoEnabled()) {
468             _log.info("Imported " + count + " pages into " + node.getName());
469         }
470     }
471 
472     protected void processSpecialPages(
473             long userId, WikiNode node, Element root,
474             List<String> specialNamespaces)
475         throws PortalException {
476 
477         ProgressTracker progressTracker =
478             ProgressTrackerThreadLocal.getProgressTracker();
479 
480         List<Element> pages = root.elements("page");
481 
482         int total = pages.size();
483 
484         Iterator<Element> itr = pages.iterator();
485 
486         for (int i = 0; itr.hasNext(); i++) {
487             Element page = itr.next();
488 
489             String title = page.elementText("title");
490 
491             if (!title.startsWith("Category:")) {
492                 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
493                     root.remove(page);
494                 }
495 
496                 continue;
497             }
498 
499             String categoryName = title.substring("Category:".length());
500 
501             categoryName = normalize(categoryName, 75);
502 
503             String description = page.element("revision").elementText("text");
504 
505             description = normalizeDescription(description);
506 
507             try {
508                 TagsEntry tagsEntry = null;
509 
510                 try {
511                     tagsEntry = TagsEntryLocalServiceUtil.getEntry(
512                         node.getCompanyId(), categoryName);
513                 }
514                 catch (NoSuchEntryException nsee) {
515                     tagsEntry = TagsEntryLocalServiceUtil.addEntry(
516                         userId, categoryName);
517                 }
518 
519                 if (Validator.isNotNull(description)) {
520                     TagsPropertyLocalServiceUtil.addProperty(
521                         userId, tagsEntry.getEntryId(), "description",
522                         description);
523                 }
524             }
525             catch (SystemException se) {
526                  _log.error(se, se);
527             }
528 
529             if ((i % 5) == 0) {
530                 progressTracker.updateProgress((i * 10) / total);
531             }
532         }
533     }
534 
535     protected String readParentTitle(String content) {
536         Matcher matcher = _parentPattern.matcher(content);
537 
538         String redirectTitle = StringPool.BLANK;
539 
540         if (matcher.find()) {
541             redirectTitle = matcher.group(1);
542 
543             redirectTitle = normalizeTitle(redirectTitle);
544 
545             redirectTitle += " (disambiguation)";
546         }
547 
548         return redirectTitle;
549     }
550 
551     protected String readRedirectTitle(String content) {
552         Matcher matcher = _redirectPattern.matcher(content);
553 
554         String redirectTitle = StringPool.BLANK;
555 
556         if (matcher.find()) {
557             redirectTitle = matcher.group(1);
558 
559             redirectTitle = normalizeTitle(redirectTitle);
560         }
561 
562         return redirectTitle;
563     }
564 
565     protected List<String> readSpecialNamespaces(Element root)
566         throws ImportFilesException {
567 
568         List<String> namespaces = new ArrayList<String>();
569 
570         Element siteinfoEl = root.element("siteinfo");
571 
572         if (siteinfoEl == null) {
573             throw new ImportFilesException("Invalid pages XML file");
574         }
575 
576         Iterator<Element> itr = siteinfoEl.element(
577             "namespaces").elements("namespace").iterator();
578 
579         while (itr.hasNext()) {
580             Element namespace = itr.next();
581 
582             if (!namespace.attribute("key").getData().equals("0")) {
583                 namespaces.add(namespace.getText());
584             }
585         }
586 
587         return namespaces;
588     }
589 
590     protected String[] readTagsEntries(
591             long userId, WikiNode node, String content)
592         throws PortalException, SystemException {
593 
594         Matcher matcher = _categoriesPattern.matcher(content);
595 
596         List<String> tagsEntries = new ArrayList<String>();
597 
598         while (matcher.find()) {
599             String categoryName = matcher.group(1);
600 
601             categoryName = normalize(categoryName, 75);
602 
603             TagsEntry tagsEntry = null;
604 
605             try {
606                 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
607                     node.getCompanyId(), categoryName);
608             }
609             catch (NoSuchEntryException nsee) {
610                 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
611                     userId, categoryName);
612             }
613 
614             tagsEntries.add(tagsEntry.getName());
615         }
616 
617         if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
618             tagsEntries.add(_WORK_IN_PROGRESS_TAG);
619         }
620 
621         return tagsEntries.toArray(new String[tagsEntries.size()]);
622     }
623 
624     protected Map<String, String> readUsersFile(File usersFile)
625         throws IOException {
626 
627         if ((usersFile == null) || (!usersFile.exists())) {
628             return Collections.EMPTY_MAP;
629         }
630 
631         Map<String, String> usersMap = new HashMap<String, String>();
632 
633         BufferedReader reader = new BufferedReader(new FileReader(usersFile));
634 
635         String line = reader.readLine();
636 
637         while (line != null) {
638             String[] array = StringUtil.split(line);
639 
640             if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
641                 (Validator.isNotNull(array[1]))) {
642 
643                 usersMap.put(array[0], array[1]);
644             }
645             else {
646                 if (_log.isInfoEnabled()) {
647                     _log.info(
648                         "Ignoring line " + line +
649                             " because it does not contain exactly 2 columns");
650                 }
651             }
652 
653             line = reader.readLine();
654         }
655 
656         return usersMap;
657     }
658 
659     private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = new String[]{
660         "thumb", "temp", "archive"};
661 
662     private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
663 
664     private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
665 
666     private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
667 
668     private static Pattern _categoriesPattern = Pattern.compile(
669         "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
670     private static Pattern _parentPattern = Pattern.compile(
671         "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
672     private static Pattern _redirectPattern = Pattern.compile(
673         "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
674 
675     private MediaWikiToCreoleTranslator _translator =
676         new MediaWikiToCreoleTranslator();
677 
678 }