1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.portlet.wiki.importers.mediawiki;
24  
25  import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26  import com.liferay.portal.NoSuchUserException;
27  import com.liferay.portal.PortalException;
28  import com.liferay.portal.SystemException;
29  import com.liferay.portal.kernel.log.Log;
30  import com.liferay.portal.kernel.log.LogFactoryUtil;
31  import com.liferay.portal.kernel.util.ArrayUtil;
32  import com.liferay.portal.kernel.util.MapUtil;
33  import com.liferay.portal.kernel.util.ObjectValuePair;
34  import com.liferay.portal.kernel.util.ProgressTracker;
35  import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
36  import com.liferay.portal.kernel.util.StringPool;
37  import com.liferay.portal.kernel.util.StringUtil;
38  import com.liferay.portal.kernel.util.Validator;
39  import com.liferay.portal.kernel.xml.Document;
40  import com.liferay.portal.kernel.xml.DocumentException;
41  import com.liferay.portal.kernel.xml.Element;
42  import com.liferay.portal.kernel.xml.SAXReaderUtil;
43  import com.liferay.portal.kernel.zip.ZipReader;
44  import com.liferay.portal.model.User;
45  import com.liferay.portal.service.ServiceContext;
46  import com.liferay.portal.service.UserLocalServiceUtil;
47  import com.liferay.portal.util.PropsValues;
48  import com.liferay.portlet.tags.NoSuchEntryException;
49  import com.liferay.portlet.tags.model.TagsEntry;
50  import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
51  import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
52  import com.liferay.portlet.tags.util.TagsUtil;
53  import com.liferay.portlet.wiki.ImportFilesException;
54  import com.liferay.portlet.wiki.NoSuchPageException;
55  import com.liferay.portlet.wiki.importers.WikiImporter;
56  import com.liferay.portlet.wiki.importers.WikiImporterKeys;
57  import com.liferay.portlet.wiki.model.WikiNode;
58  import com.liferay.portlet.wiki.model.WikiPage;
59  import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
60  import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
61  import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
62  
63  import java.io.BufferedReader;
64  import java.io.File;
65  import java.io.FileReader;
66  import java.io.IOException;
67  
68  import java.util.ArrayList;
69  import java.util.Collections;
70  import java.util.HashMap;
71  import java.util.Iterator;
72  import java.util.List;
73  import java.util.Map;
74  import java.util.regex.Matcher;
75  import java.util.regex.Pattern;
76  
77  /**
78   * <a href="MediaWikiImporter.java.html"><b><i>View Source</i></b></a>
79   *
80   * @author Alvaro del Castillo
81   * @author Jorge Ferrer
82   *
83   */
84  public class MediaWikiImporter implements WikiImporter {
85  
86      public static final String SHARED_IMAGES_CONTENT = "See attachments";
87  
88      public static final String SHARED_IMAGES_TITLE = "SharedImages";
89  
90      public void importPages(
91              long userId, WikiNode node, File[] files,
92              Map<String, String[]> options)
93          throws PortalException {
94  
95          if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
96              throw new PortalException("The pages file is mandatory");
97          }
98  
99          File pagesFile = files[0];
100         File usersFile = files[1];
101         File imagesFile = files[2];
102 
103         try {
104             Document doc = SAXReaderUtil.read(pagesFile);
105 
106             Map<String, String> usersMap = readUsersFile(usersFile);
107 
108             Element root = doc.getRootElement();
109 
110             List<String> specialNamespaces = readSpecialNamespaces(root);
111 
112             processSpecialPages(userId, node, root, specialNamespaces);
113             processRegularPages(
114                 userId, node, root, specialNamespaces, usersMap, imagesFile,
115                 options);
116             processImages(userId, node, imagesFile);
117 
118             moveFrontPage(userId, node, options);
119         }
120         catch (DocumentException de) {
121             throw new ImportFilesException("Invalid XML file provided");
122         }
123         catch (IOException de) {
124             throw new ImportFilesException("Error reading the files provided");
125         }
126         catch (PortalException e) {
127             throw e;
128         }
129         catch (Exception e) {
130             throw new PortalException(e);
131         }
132     }
133 
134     protected long getUserId(
135             long userId, WikiNode node, String author,
136             Map<String, String> usersMap)
137         throws PortalException, SystemException {
138 
139         User user = null;
140 
141         String emailAddress = usersMap.get(author);
142 
143         try {
144             if (Validator.isNull(emailAddress)) {
145                 user = UserLocalServiceUtil.getUserByScreenName(
146                     node.getCompanyId(), author.toLowerCase());
147             }
148             else {
149                 user = UserLocalServiceUtil.getUserByEmailAddress(
150                     node.getCompanyId(), emailAddress);
151             }
152         }
153         catch (NoSuchUserException nsue) {
154             user = UserLocalServiceUtil.getUserById(userId);
155         }
156 
157         return user.getUserId();
158     }
159 
160     protected void importPage(
161             long userId, String author, WikiNode node, String title,
162             String content, String summary, Map<String, String> usersMap)
163         throws PortalException {
164 
165         try {
166             long authorUserId = getUserId(userId, node, author, usersMap);
167             String parentTitle = readParentTitle(content);
168             String redirectTitle = readRedirectTitle(content);
169 
170             ServiceContext serviceContext = new ServiceContext();
171 
172             serviceContext.setTagsEntries(
173                 readTagsEntries(userId, node, content));
174 
175             if (Validator.isNull(redirectTitle)) {
176                 content = _translator.translate(content);
177             }
178             else {
179                 content =
180                     StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
181                         StringPool.DOUBLE_CLOSE_BRACKET;
182             }
183 
184             WikiPage page = null;
185 
186             try {
187                 page = WikiPageLocalServiceUtil.getPage(
188                     node.getNodeId(), title);
189             }
190             catch (NoSuchPageException nspe) {
191                 page = WikiPageLocalServiceUtil.addPage(
192                     authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
193                     null, true, serviceContext);
194             }
195 
196             WikiPageLocalServiceUtil.updatePage(
197                 authorUserId, node.getNodeId(), title, page.getVersion(),
198                 content, summary, true, "creole", parentTitle, redirectTitle,
199                 serviceContext);
200         }
201         catch (Exception e) {
202             throw new PortalException("Error importing page " + title, e);
203         }
204     }
205 
206     protected boolean isSpecialMediaWikiPage(
207         String title, List<String> specialNamespaces) {
208 
209         for (String namespace: specialNamespaces) {
210             if (title.startsWith(namespace + StringPool.COLON)) {
211                 return true;
212             }
213         }
214 
215         return false;
216     }
217 
218     protected boolean isValidImage(String[] paths, byte[] bytes) {
219         if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
220             return false;
221         }
222 
223         if ((paths.length > 1) &&
224             (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
225 
226             return false;
227         }
228 
229         String fileName = paths[paths.length - 1];
230 
231         try {
232             DLLocalServiceUtil.validate(fileName, bytes);
233         }
234         catch (PortalException pe) {
235             return false;
236         }
237         catch (SystemException se) {
238             return false;
239         }
240 
241         return true;
242     }
243 
244     protected void moveFrontPage(
245         long userId, WikiNode node, Map<String, String[]> options) {
246 
247         String frontPageTitle = MapUtil.getString(
248             options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
249 
250         if (Validator.isNotNull(frontPageTitle)) {
251             frontPageTitle = normalizeTitle(frontPageTitle);
252 
253             try {
254                 if (WikiPageLocalServiceUtil.getPagesCount(
255                         node.getNodeId(), frontPageTitle, true) > 0) {
256 
257                     ServiceContext serviceContext = new ServiceContext();
258 
259                     WikiPageLocalServiceUtil.movePage(
260                         userId, node.getNodeId(), frontPageTitle,
261                         WikiPageImpl.FRONT_PAGE, false, serviceContext);
262 
263                 }
264             }
265             catch (Exception e) {
266                 if (_log.isWarnEnabled()) {
267                     StringBuilder sb = new StringBuilder();
268 
269                     sb.append("Could not move ");
270                     sb.append(WikiPageImpl.FRONT_PAGE);
271                     sb.append(" to the title provided: ");
272                     sb.append(frontPageTitle);
273 
274                     _log.warn(sb.toString(), e);
275                 }
276             }
277 
278         }
279 
280     }
281 
282     protected String normalize(String categoryName, int length) {
283         categoryName = TagsUtil.toWord(categoryName.trim());
284 
285         return StringUtil.shorten(categoryName, length);
286     }
287 
288     protected String normalizeDescription(String description) {
289         description = description.replaceAll(
290             _categoriesPattern.pattern(), StringPool.BLANK);
291 
292         return normalize(description, 300);
293     }
294 
295     protected String normalizeTitle(String title) {
296         title = title.replaceAll(
297             PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
298 
299         return StringUtil.shorten(title, 75);
300     }
301 
302     private void processImages(long userId, WikiNode node, File imagesFile)
303         throws Exception {
304 
305         if ((imagesFile == null) || (!imagesFile.exists())) {
306             return;
307         }
308 
309         ProgressTracker progressTracker =
310             ProgressTrackerThreadLocal.getProgressTracker();
311 
312         int count = 0;
313 
314         ZipReader zipReader = new ZipReader(imagesFile);
315 
316         Map<String, byte[]> entries = zipReader.getEntries();
317 
318         int total = entries.size();
319 
320         if (total > 0) {
321             try {
322                 WikiPageLocalServiceUtil.getPage(
323                     node.getNodeId(), SHARED_IMAGES_TITLE);
324             }
325             catch (NoSuchPageException nspe) {
326                 ServiceContext serviceContext = new ServiceContext();
327 
328                 WikiPageLocalServiceUtil.addPage(
329                     userId, node.getNodeId(), SHARED_IMAGES_TITLE,
330                     SHARED_IMAGES_CONTENT, null, true, serviceContext);
331             }
332         }
333 
334         List<ObjectValuePair<String, byte[]>> attachments =
335             new ArrayList<ObjectValuePair<String, byte[]>>();
336 
337         Iterator<Map.Entry<String, byte[]>> itr = entries.entrySet().iterator();
338 
339         int percentage = 50;
340 
341         for (int i = 0; itr.hasNext(); i++) {
342             Map.Entry<String, byte[]> entry = itr.next();
343 
344             String key = entry.getKey();
345             byte[] value = entry.getValue();
346 
347             if (key.endsWith(StringPool.SLASH)) {
348                 if (_log.isInfoEnabled()) {
349                     _log.info("Ignoring " + key);
350                 }
351 
352                 continue;
353             }
354 
355             String[] paths = StringUtil.split(key, StringPool.SLASH);
356 
357             if (!isValidImage(paths, value)) {
358                 if (_log.isInfoEnabled()) {
359                     _log.info("Ignoring " + key);
360                 }
361 
362                 continue;
363             }
364 
365             String fileName = paths[paths.length - 1].toLowerCase();
366 
367             attachments.add(
368                 new ObjectValuePair<String, byte[]>(fileName, value));
369 
370             count++;
371 
372             if ((i % 5) == 0) {
373                 WikiPageLocalServiceUtil.addPageAttachments(
374                     node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
375 
376                 attachments.clear();
377 
378                 percentage = Math.min(50 + (i * 50) / total, 99);
379 
380                 progressTracker.updateProgress(percentage);
381             }
382         }
383 
384         if (!attachments.isEmpty()) {
385             WikiPageLocalServiceUtil.addPageAttachments(
386                 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
387         }
388 
389         if (_log.isInfoEnabled()) {
390             _log.info("Imported " + count + " images into " + node.getName());
391         }
392     }
393 
394     protected void processRegularPages(
395         long userId, WikiNode node, Element root,
396         List<String> specialNamespaces, Map<String, String> usersMap,
397         File imagesFile, Map<String, String[]> options) {
398 
399         boolean importLatestVersion = MapUtil.getBoolean(
400             options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
401 
402         ProgressTracker progressTracker =
403             ProgressTrackerThreadLocal.getProgressTracker();
404 
405         int count = 0;
406 
407         List<Element> pages = root.elements("page");
408 
409         int total = pages.size();
410 
411         Iterator<Element> itr = root.elements("page").iterator();
412 
413         int percentage = 10;
414         int maxPercentage = 50;
415 
416         if ((imagesFile == null) || (!imagesFile.exists())) {
417             maxPercentage = 99;
418         }
419 
420         int percentageRange = maxPercentage - percentage;
421 
422         for (int i = 0; itr.hasNext(); i++) {
423             Element pageEl = itr.next();
424 
425             String title = pageEl.elementText("title");
426 
427             title = normalizeTitle(title);
428 
429             percentage = Math.min(
430                 10 + (i * percentageRange) / total, maxPercentage);
431 
432             progressTracker.updateProgress(percentage);
433 
434             if (isSpecialMediaWikiPage(title, specialNamespaces)) {
435                 continue;
436             }
437 
438             List<Element> revisionEls = pageEl.elements("revision");
439 
440             if (importLatestVersion) {
441                 Element lastRevisionEl = revisionEls.get(
442                     revisionEls.size() - 1);
443 
444                 revisionEls = new ArrayList<Element>();
445 
446                 revisionEls.add(lastRevisionEl);
447             }
448 
449             for (Element curRevisionEl : revisionEls) {
450                 String author = curRevisionEl.element(
451                     "contributor").elementText("username");
452                 String content = curRevisionEl.elementText("text");
453                 String summary = curRevisionEl.elementText("comment");
454 
455                 try {
456                     importPage(
457                         userId, author, node, title, content, summary,
458                         usersMap);
459                 }
460                 catch (Exception e) {
461                     if (_log.isWarnEnabled()) {
462                         StringBuilder sb = new StringBuilder();
463 
464                         sb.append("Page with title ");
465                         sb.append(title);
466                         sb.append(" could not be imported");
467 
468                         _log.warn(sb.toString(), e);
469                     }
470                 }
471             }
472 
473             count++;
474         }
475 
476         if (_log.isInfoEnabled()) {
477             _log.info("Imported " + count + " pages into " + node.getName());
478         }
479     }
480 
481     protected void processSpecialPages(
482             long userId, WikiNode node, Element root,
483             List<String> specialNamespaces)
484         throws PortalException {
485 
486         ProgressTracker progressTracker =
487             ProgressTrackerThreadLocal.getProgressTracker();
488 
489         List<Element> pages = root.elements("page");
490 
491         int total = pages.size();
492 
493         Iterator<Element> itr = pages.iterator();
494 
495         for (int i = 0; itr.hasNext(); i++) {
496             Element page = itr.next();
497 
498             String title = page.elementText("title");
499 
500             if (!title.startsWith("Category:")) {
501                 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
502                     root.remove(page);
503                 }
504 
505                 continue;
506             }
507 
508             String categoryName = title.substring("Category:".length());
509 
510             categoryName = normalize(categoryName, 75);
511 
512             String description = page.element("revision").elementText("text");
513 
514             description = normalizeDescription(description);
515 
516             try {
517                 TagsEntry tagsEntry = null;
518 
519                 try {
520                     tagsEntry = TagsEntryLocalServiceUtil.getEntry(
521                         node.getCompanyId(), categoryName);
522                 }
523                 catch (NoSuchEntryException nsee) {
524                     ServiceContext serviceContext = new ServiceContext();
525 
526                     serviceContext.setAddCommunityPermissions(true);
527                     serviceContext.setAddGuestPermissions(true);
528                     serviceContext.setScopeGroupId(node.getGroupId());
529 
530                     tagsEntry = TagsEntryLocalServiceUtil.addEntry(
531                         userId, null, categoryName, null, null, serviceContext);
532                 }
533 
534                 if (Validator.isNotNull(description)) {
535                     TagsPropertyLocalServiceUtil.addProperty(
536                         userId, tagsEntry.getEntryId(), "description",
537                         description);
538                 }
539             }
540             catch (SystemException se) {
541                  _log.error(se, se);
542             }
543 
544             if ((i % 5) == 0) {
545                 progressTracker.updateProgress((i * 10) / total);
546             }
547         }
548     }
549 
550     protected String readParentTitle(String content) {
551         Matcher matcher = _parentPattern.matcher(content);
552 
553         String redirectTitle = StringPool.BLANK;
554 
555         if (matcher.find()) {
556             redirectTitle = matcher.group(1);
557 
558             redirectTitle = normalizeTitle(redirectTitle);
559 
560             redirectTitle += " (disambiguation)";
561         }
562 
563         return redirectTitle;
564     }
565 
566     protected String readRedirectTitle(String content) {
567         Matcher matcher = _redirectPattern.matcher(content);
568 
569         String redirectTitle = StringPool.BLANK;
570 
571         if (matcher.find()) {
572             redirectTitle = matcher.group(1);
573 
574             redirectTitle = normalizeTitle(redirectTitle);
575         }
576 
577         return redirectTitle;
578     }
579 
580     protected List<String> readSpecialNamespaces(Element root)
581         throws ImportFilesException {
582 
583         List<String> namespaces = new ArrayList<String>();
584 
585         Element siteinfoEl = root.element("siteinfo");
586 
587         if (siteinfoEl == null) {
588             throw new ImportFilesException("Invalid pages XML file");
589         }
590 
591         Iterator<Element> itr = siteinfoEl.element(
592             "namespaces").elements("namespace").iterator();
593 
594         while (itr.hasNext()) {
595             Element namespace = itr.next();
596 
597             if (!namespace.attribute("key").getData().equals("0")) {
598                 namespaces.add(namespace.getText());
599             }
600         }
601 
602         return namespaces;
603     }
604 
605     protected String[] readTagsEntries(
606             long userId, WikiNode node, String content)
607         throws PortalException, SystemException {
608 
609         Matcher matcher = _categoriesPattern.matcher(content);
610 
611         List<String> tagsEntries = new ArrayList<String>();
612 
613         while (matcher.find()) {
614             String categoryName = matcher.group(1);
615 
616             categoryName = normalize(categoryName, 75);
617 
618             TagsEntry tagsEntry = null;
619 
620             try {
621                 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
622                     node.getCompanyId(), categoryName);
623             }
624             catch (NoSuchEntryException nsee) {
625                 ServiceContext serviceContext = new ServiceContext();
626 
627                 serviceContext.setAddCommunityPermissions(true);
628                 serviceContext.setAddGuestPermissions(true);
629                 serviceContext.setScopeGroupId(node.getGroupId());
630 
631                 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
632                     userId, null, categoryName, null, null, serviceContext);
633             }
634 
635             tagsEntries.add(tagsEntry.getName());
636         }
637 
638         if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
639             tagsEntries.add(_WORK_IN_PROGRESS_TAG);
640         }
641 
642         return tagsEntries.toArray(new String[tagsEntries.size()]);
643     }
644 
645     protected Map<String, String> readUsersFile(File usersFile)
646         throws IOException {
647 
648         if ((usersFile == null) || (!usersFile.exists())) {
649             return Collections.EMPTY_MAP;
650         }
651 
652         Map<String, String> usersMap = new HashMap<String, String>();
653 
654         BufferedReader reader = new BufferedReader(new FileReader(usersFile));
655 
656         String line = reader.readLine();
657 
658         while (line != null) {
659             String[] array = StringUtil.split(line);
660 
661             if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
662                 (Validator.isNotNull(array[1]))) {
663 
664                 usersMap.put(array[0], array[1]);
665             }
666             else {
667                 if (_log.isInfoEnabled()) {
668                     _log.info(
669                         "Ignoring line " + line +
670                             " because it does not contain exactly 2 columns");
671                 }
672             }
673 
674             line = reader.readLine();
675         }
676 
677         return usersMap;
678     }
679 
680     private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = new String[]{
681         "thumb", "temp", "archive"};
682 
683     private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
684 
685     private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
686 
687     private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
688 
689     private static Pattern _categoriesPattern = Pattern.compile(
690         "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
691     private static Pattern _parentPattern = Pattern.compile(
692         "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
693     private static Pattern _redirectPattern = Pattern.compile(
694         "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
695 
696     private MediaWikiToCreoleTranslator _translator =
697         new MediaWikiToCreoleTranslator();
698 
699 }