1
14
15 package com.liferay.portlet.wiki.importers.mediawiki;
16
17 import com.liferay.documentlibrary.service.DLLocalServiceUtil;
18 import com.liferay.portal.NoSuchUserException;
19 import com.liferay.portal.PortalException;
20 import com.liferay.portal.SystemException;
21 import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
22 import com.liferay.portal.kernel.log.Log;
23 import com.liferay.portal.kernel.log.LogFactoryUtil;
24 import com.liferay.portal.kernel.util.ArrayUtil;
25 import com.liferay.portal.kernel.util.MapUtil;
26 import com.liferay.portal.kernel.util.ObjectValuePair;
27 import com.liferay.portal.kernel.util.ProgressTracker;
28 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
29 import com.liferay.portal.kernel.util.StringBundler;
30 import com.liferay.portal.kernel.util.StringPool;
31 import com.liferay.portal.kernel.util.StringUtil;
32 import com.liferay.portal.kernel.util.Validator;
33 import com.liferay.portal.kernel.xml.Document;
34 import com.liferay.portal.kernel.xml.DocumentException;
35 import com.liferay.portal.kernel.xml.Element;
36 import com.liferay.portal.kernel.xml.SAXReaderUtil;
37 import com.liferay.portal.kernel.zip.ZipReader;
38 import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
39 import com.liferay.portal.model.User;
40 import com.liferay.portal.service.ServiceContext;
41 import com.liferay.portal.service.UserLocalServiceUtil;
42 import com.liferay.portal.util.PropsValues;
43 import com.liferay.portlet.tags.NoSuchEntryException;
44 import com.liferay.portlet.tags.model.TagsEntry;
45 import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
46 import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
47 import com.liferay.portlet.tags.util.TagsUtil;
48 import com.liferay.portlet.wiki.ImportFilesException;
49 import com.liferay.portlet.wiki.NoSuchPageException;
50 import com.liferay.portlet.wiki.importers.WikiImporter;
51 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
52 import com.liferay.portlet.wiki.model.WikiNode;
53 import com.liferay.portlet.wiki.model.WikiPage;
54 import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
55 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
56 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
57
58 import java.io.File;
59 import java.io.FileReader;
60 import java.io.IOException;
61
62 import java.util.ArrayList;
63 import java.util.Collections;
64 import java.util.HashMap;
65 import java.util.Iterator;
66 import java.util.List;
67 import java.util.Map;
68 import java.util.regex.Matcher;
69 import java.util.regex.Pattern;
70
71
77 public class MediaWikiImporter implements WikiImporter {
78
79 public static final String SHARED_IMAGES_CONTENT = "See attachments";
80
81 public static final String SHARED_IMAGES_TITLE = "SharedImages";
82
83 public void importPages(
84 long userId, WikiNode node, File[] files,
85 Map<String, String[]> options)
86 throws PortalException {
87
88 if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
89 throw new PortalException("The pages file is mandatory");
90 }
91
92 File pagesFile = files[0];
93 File usersFile = files[1];
94 File imagesFile = files[2];
95
96 try {
97 Document doc = SAXReaderUtil.read(pagesFile);
98
99 Map<String, String> usersMap = readUsersFile(usersFile);
100
101 Element root = doc.getRootElement();
102
103 List<String> specialNamespaces = readSpecialNamespaces(root);
104
105 processSpecialPages(userId, node, root, specialNamespaces);
106 processRegularPages(
107 userId, node, root, specialNamespaces, usersMap, imagesFile,
108 options);
109 processImages(userId, node, imagesFile);
110
111 moveFrontPage(userId, node, options);
112 }
113 catch (DocumentException de) {
114 throw new ImportFilesException("Invalid XML file provided");
115 }
116 catch (IOException de) {
117 throw new ImportFilesException("Error reading the files provided");
118 }
119 catch (PortalException e) {
120 throw e;
121 }
122 catch (Exception e) {
123 throw new PortalException(e);
124 }
125 }
126
127 protected long getUserId(
128 long userId, WikiNode node, String author,
129 Map<String, String> usersMap)
130 throws PortalException, SystemException {
131
132 User user = null;
133
134 String emailAddress = usersMap.get(author);
135
136 try {
137 if (Validator.isNull(emailAddress)) {
138 user = UserLocalServiceUtil.getUserByScreenName(
139 node.getCompanyId(), author.toLowerCase());
140 }
141 else {
142 user = UserLocalServiceUtil.getUserByEmailAddress(
143 node.getCompanyId(), emailAddress);
144 }
145 }
146 catch (NoSuchUserException nsue) {
147 user = UserLocalServiceUtil.getUserById(userId);
148 }
149
150 return user.getUserId();
151 }
152
153 protected void importPage(
154 long userId, String author, WikiNode node, String title,
155 String content, String summary, Map<String, String> usersMap)
156 throws PortalException {
157
158 try {
159 long authorUserId = getUserId(userId, node, author, usersMap);
160 String parentTitle = readParentTitle(content);
161 String redirectTitle = readRedirectTitle(content);
162
163 ServiceContext serviceContext = new ServiceContext();
164
165 serviceContext.setAddCommunityPermissions(true);
166 serviceContext.setAddGuestPermissions(true);
167 serviceContext.setTagsEntries(
168 readTagsEntries(userId, node, content));
169
170 if (Validator.isNull(redirectTitle)) {
171 content = _translator.translate(content);
172 }
173 else {
174 content =
175 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
176 StringPool.DOUBLE_CLOSE_BRACKET;
177 }
178
179 WikiPage page = null;
180
181 try {
182 page = WikiPageLocalServiceUtil.getPage(
183 node.getNodeId(), title);
184 }
185 catch (NoSuchPageException nspe) {
186 page = WikiPageLocalServiceUtil.addPage(
187 authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
188 null, true, serviceContext);
189 }
190
191 WikiPageLocalServiceUtil.updatePage(
192 authorUserId, node.getNodeId(), title, page.getVersion(),
193 content, summary, true, "creole", parentTitle, redirectTitle,
194 serviceContext);
195 }
196 catch (Exception e) {
197 throw new PortalException("Error importing page " + title, e);
198 }
199 }
200
201 protected boolean isSpecialMediaWikiPage(
202 String title, List<String> specialNamespaces) {
203
204 for (String namespace: specialNamespaces) {
205 if (title.startsWith(namespace + StringPool.COLON)) {
206 return true;
207 }
208 }
209
210 return false;
211 }
212
213 protected boolean isValidImage(String[] paths, byte[] bytes) {
214 if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
215 return false;
216 }
217
218 if ((paths.length > 1) &&
219 (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
220
221 return false;
222 }
223
224 String fileName = paths[paths.length - 1];
225
226 try {
227 DLLocalServiceUtil.validate(fileName, bytes);
228 }
229 catch (PortalException pe) {
230 return false;
231 }
232 catch (SystemException se) {
233 return false;
234 }
235
236 return true;
237 }
238
239 protected void moveFrontPage(
240 long userId, WikiNode node, Map<String, String[]> options) {
241
242 String frontPageTitle = MapUtil.getString(
243 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
244
245 if (Validator.isNotNull(frontPageTitle)) {
246 frontPageTitle = normalizeTitle(frontPageTitle);
247
248 try {
249 if (WikiPageLocalServiceUtil.getPagesCount(
250 node.getNodeId(), frontPageTitle, true) > 0) {
251
252 ServiceContext serviceContext = new ServiceContext();
253
254 serviceContext.setAddCommunityPermissions(true);
255 serviceContext.setAddGuestPermissions(true);
256
257 WikiPageLocalServiceUtil.movePage(
258 userId, node.getNodeId(), frontPageTitle,
259 WikiPageImpl.FRONT_PAGE, false, serviceContext);
260
261 }
262 }
263 catch (Exception e) {
264 if (_log.isWarnEnabled()) {
265 StringBundler sb = new StringBundler(4);
266
267 sb.append("Could not move ");
268 sb.append(WikiPageImpl.FRONT_PAGE);
269 sb.append(" to the title provided: ");
270 sb.append(frontPageTitle);
271
272 _log.warn(sb.toString(), e);
273 }
274 }
275
276 }
277
278 }
279
280 protected String normalize(String categoryName, int length) {
281 categoryName = TagsUtil.toWord(categoryName.trim());
282
283 return StringUtil.shorten(categoryName, length);
284 }
285
286 protected String normalizeDescription(String description) {
287 description = description.replaceAll(
288 _categoriesPattern.pattern(), StringPool.BLANK);
289
290 return normalize(description, 300);
291 }
292
293 protected String normalizeTitle(String title) {
294 title = title.replaceAll(
295 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
296
297 return StringUtil.shorten(title, 75);
298 }
299
300 protected void processImages(long userId, WikiNode node, File imagesFile)
301 throws Exception {
302
303 if ((imagesFile == null) || (!imagesFile.exists())) {
304 return;
305 }
306
307 ProgressTracker progressTracker =
308 ProgressTrackerThreadLocal.getProgressTracker();
309
310 int count = 0;
311
312 ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(imagesFile);
313
314 List<String> entries = zipReader.getEntries();
315
316 int total = entries.size();
317
318 if (total > 0) {
319 try {
320 WikiPageLocalServiceUtil.getPage(
321 node.getNodeId(), SHARED_IMAGES_TITLE);
322 }
323 catch (NoSuchPageException nspe) {
324 ServiceContext serviceContext = new ServiceContext();
325
326 serviceContext.setAddCommunityPermissions(true);
327 serviceContext.setAddGuestPermissions(true);
328
329 WikiPageLocalServiceUtil.addPage(
330 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
331 SHARED_IMAGES_CONTENT, null, true, serviceContext);
332 }
333 }
334
335 List<ObjectValuePair<String, byte[]>> attachments =
336 new ArrayList<ObjectValuePair<String, byte[]>>();
337
338 int percentage = 50;
339
340 for (int i = 0; i < entries.size(); i++) {
341 String entry = entries.get(i);
342
343 String key = entry;
344 byte[] value = zipReader.getEntryAsByteArray(entry);
345
346 if (key.endsWith(StringPool.SLASH)) {
347 if (_log.isInfoEnabled()) {
348 _log.info("Ignoring " + key);
349 }
350
351 continue;
352 }
353
354 String[] paths = StringUtil.split(key, StringPool.SLASH);
355
356 if (!isValidImage(paths, value)) {
357 if (_log.isInfoEnabled()) {
358 _log.info("Ignoring " + key);
359 }
360
361 continue;
362 }
363
364 String fileName = paths[paths.length - 1].toLowerCase();
365
366 attachments.add(
367 new ObjectValuePair<String, byte[]>(fileName, value));
368
369 count++;
370
371 if ((i % 5) == 0) {
372 WikiPageLocalServiceUtil.addPageAttachments(
373 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
374
375 attachments.clear();
376
377 percentage = Math.min(50 + (i * 50) / total, 99);
378
379 progressTracker.updateProgress(percentage);
380 }
381 }
382
383 if (!attachments.isEmpty()) {
384 WikiPageLocalServiceUtil.addPageAttachments(
385 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
386 }
387
388 zipReader.close();
389
390 if (_log.isInfoEnabled()) {
391 _log.info("Imported " + count + " images into " + node.getName());
392 }
393 }
394
395 protected void processRegularPages(
396 long userId, WikiNode node, Element root,
397 List<String> specialNamespaces, Map<String, String> usersMap,
398 File imagesFile, Map<String, String[]> options) {
399
400 boolean importLatestVersion = MapUtil.getBoolean(
401 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
402
403 ProgressTracker progressTracker =
404 ProgressTrackerThreadLocal.getProgressTracker();
405
406 int count = 0;
407
408 List<Element> pages = root.elements("page");
409
410 int total = pages.size();
411
412 Iterator<Element> itr = root.elements("page").iterator();
413
414 int percentage = 10;
415 int maxPercentage = 50;
416
417 if ((imagesFile == null) || (!imagesFile.exists())) {
418 maxPercentage = 99;
419 }
420
421 int percentageRange = maxPercentage - percentage;
422
423 for (int i = 0; itr.hasNext(); i++) {
424 Element pageEl = itr.next();
425
426 String title = pageEl.elementText("title");
427
428 title = normalizeTitle(title);
429
430 percentage = Math.min(
431 10 + (i * percentageRange) / total, maxPercentage);
432
433 progressTracker.updateProgress(percentage);
434
435 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
436 continue;
437 }
438
439 List<Element> revisionEls = pageEl.elements("revision");
440
441 if (importLatestVersion) {
442 Element lastRevisionEl = revisionEls.get(
443 revisionEls.size() - 1);
444
445 revisionEls = new ArrayList<Element>();
446
447 revisionEls.add(lastRevisionEl);
448 }
449
450 for (Element curRevisionEl : revisionEls) {
451 String author = curRevisionEl.element(
452 "contributor").elementText("username");
453 String content = curRevisionEl.elementText("text");
454 String summary = curRevisionEl.elementText("comment");
455
456 try {
457 importPage(
458 userId, author, node, title, content, summary,
459 usersMap);
460 }
461 catch (Exception e) {
462 if (_log.isWarnEnabled()) {
463 StringBundler sb = new StringBundler(3);
464
465 sb.append("Page with title ");
466 sb.append(title);
467 sb.append(" could not be imported");
468
469 _log.warn(sb.toString(), e);
470 }
471 }
472 }
473
474 count++;
475 }
476
477 if (_log.isInfoEnabled()) {
478 _log.info("Imported " + count + " pages into " + node.getName());
479 }
480 }
481
482 protected void processSpecialPages(
483 long userId, WikiNode node, Element root,
484 List<String> specialNamespaces)
485 throws PortalException {
486
487 ProgressTracker progressTracker =
488 ProgressTrackerThreadLocal.getProgressTracker();
489
490 List<Element> pages = root.elements("page");
491
492 int total = pages.size();
493
494 Iterator<Element> itr = pages.iterator();
495
496 for (int i = 0; itr.hasNext(); i++) {
497 Element page = itr.next();
498
499 String title = page.elementText("title");
500
501 if (!title.startsWith("Category:")) {
502 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
503 root.remove(page);
504 }
505
506 continue;
507 }
508
509 String categoryName = title.substring("Category:".length());
510
511 categoryName = normalize(categoryName, 75);
512
513 String description = page.element("revision").elementText("text");
514
515 description = normalizeDescription(description);
516
517 try {
518 TagsEntry tagsEntry = null;
519
520 try {
521 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
522 node.getCompanyId(), categoryName);
523 }
524 catch (NoSuchEntryException nsee) {
525 ServiceContext serviceContext = new ServiceContext();
526
527 serviceContext.setAddCommunityPermissions(true);
528 serviceContext.setAddGuestPermissions(true);
529 serviceContext.setScopeGroupId(node.getGroupId());
530
531 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
532 userId, null, categoryName, null, null, serviceContext);
533 }
534
535 if (Validator.isNotNull(description)) {
536 TagsPropertyLocalServiceUtil.addProperty(
537 userId, tagsEntry.getEntryId(), "description",
538 description);
539 }
540 }
541 catch (SystemException se) {
542 _log.error(se, se);
543 }
544
545 if ((i % 5) == 0) {
546 progressTracker.updateProgress((i * 10) / total);
547 }
548 }
549 }
550
551 protected String readParentTitle(String content) {
552 Matcher matcher = _parentPattern.matcher(content);
553
554 String redirectTitle = StringPool.BLANK;
555
556 if (matcher.find()) {
557 redirectTitle = matcher.group(1);
558
559 redirectTitle = normalizeTitle(redirectTitle);
560
561 redirectTitle += " (disambiguation)";
562 }
563
564 return redirectTitle;
565 }
566
567 protected String readRedirectTitle(String content) {
568 Matcher matcher = _redirectPattern.matcher(content);
569
570 String redirectTitle = StringPool.BLANK;
571
572 if (matcher.find()) {
573 redirectTitle = matcher.group(1);
574
575 redirectTitle = normalizeTitle(redirectTitle);
576 }
577
578 return redirectTitle;
579 }
580
581 protected List<String> readSpecialNamespaces(Element root)
582 throws ImportFilesException {
583
584 List<String> namespaces = new ArrayList<String>();
585
586 Element siteinfoEl = root.element("siteinfo");
587
588 if (siteinfoEl == null) {
589 throw new ImportFilesException("Invalid pages XML file");
590 }
591
592 Iterator<Element> itr = siteinfoEl.element(
593 "namespaces").elements("namespace").iterator();
594
595 while (itr.hasNext()) {
596 Element namespace = itr.next();
597
598 if (!namespace.attribute("key").getData().equals("0")) {
599 namespaces.add(namespace.getText());
600 }
601 }
602
603 return namespaces;
604 }
605
606 protected String[] readTagsEntries(
607 long userId, WikiNode node, String content)
608 throws PortalException, SystemException {
609
610 Matcher matcher = _categoriesPattern.matcher(content);
611
612 List<String> tagsEntries = new ArrayList<String>();
613
614 while (matcher.find()) {
615 String categoryName = matcher.group(1);
616
617 categoryName = normalize(categoryName, 75);
618
619 TagsEntry tagsEntry = null;
620
621 try {
622 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
623 node.getGroupId(), categoryName);
624 }
625 catch (NoSuchEntryException nsee) {
626 ServiceContext serviceContext = new ServiceContext();
627
628 serviceContext.setAddCommunityPermissions(true);
629 serviceContext.setAddGuestPermissions(true);
630 serviceContext.setScopeGroupId(node.getGroupId());
631
632 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
633 userId, null, categoryName, null, null, serviceContext);
634 }
635
636 tagsEntries.add(tagsEntry.getName());
637 }
638
639 if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
640 tagsEntries.add(_WORK_IN_PROGRESS_TAG);
641 }
642
643 return tagsEntries.toArray(new String[tagsEntries.size()]);
644 }
645
646 protected Map<String, String> readUsersFile(File usersFile)
647 throws IOException {
648
649 if ((usersFile == null) || (!usersFile.exists())) {
650 return Collections.EMPTY_MAP;
651 }
652
653 Map<String, String> usersMap = new HashMap<String, String>();
654
655 UnsyncBufferedReader unsyncBufferedReader =
656 new UnsyncBufferedReader(new FileReader(usersFile));
657
658 String line = unsyncBufferedReader.readLine();
659
660 while (line != null) {
661 String[] array = StringUtil.split(line);
662
663 if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
664 (Validator.isNotNull(array[1]))) {
665
666 usersMap.put(array[0], array[1]);
667 }
668 else {
669 if (_log.isInfoEnabled()) {
670 _log.info(
671 "Ignoring line " + line +
672 " because it does not contain exactly 2 columns");
673 }
674 }
675
676 line = unsyncBufferedReader.readLine();
677 }
678
679 return usersMap;
680 }
681
682 private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
683 "thumb", "temp", "archive"
684 };
685
686 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
687
688 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
689
690 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
691
692 private static Pattern _categoriesPattern = Pattern.compile(
693 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
694 private static Pattern _parentPattern = Pattern.compile(
695 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
696 private static Pattern _redirectPattern = Pattern.compile(
697 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
698
699 private MediaWikiToCreoleTranslator _translator =
700 new MediaWikiToCreoleTranslator();
701
702 }