1
22
23 package com.liferay.portlet.wiki.importers.mediawiki;
24
25 import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26 import com.liferay.portal.NoSuchUserException;
27 import com.liferay.portal.PortalException;
28 import com.liferay.portal.SystemException;
29 import com.liferay.portal.kernel.log.Log;
30 import com.liferay.portal.kernel.log.LogFactoryUtil;
31 import com.liferay.portal.kernel.util.ArrayUtil;
32 import com.liferay.portal.kernel.util.MapUtil;
33 import com.liferay.portal.kernel.util.ObjectValuePair;
34 import com.liferay.portal.kernel.util.ProgressTracker;
35 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
36 import com.liferay.portal.kernel.util.StringPool;
37 import com.liferay.portal.kernel.util.StringUtil;
38 import com.liferay.portal.kernel.util.Validator;
39 import com.liferay.portal.kernel.xml.Document;
40 import com.liferay.portal.kernel.xml.DocumentException;
41 import com.liferay.portal.kernel.xml.Element;
42 import com.liferay.portal.kernel.xml.SAXReaderUtil;
43 import com.liferay.portal.kernel.zip.ZipReader;
44 import com.liferay.portal.model.User;
45 import com.liferay.portal.service.ServiceContext;
46 import com.liferay.portal.service.UserLocalServiceUtil;
47 import com.liferay.portal.util.PropsValues;
48 import com.liferay.portlet.tags.NoSuchEntryException;
49 import com.liferay.portlet.tags.model.TagsEntry;
50 import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
51 import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
52 import com.liferay.portlet.tags.util.TagsUtil;
53 import com.liferay.portlet.wiki.ImportFilesException;
54 import com.liferay.portlet.wiki.NoSuchPageException;
55 import com.liferay.portlet.wiki.importers.WikiImporter;
56 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
57 import com.liferay.portlet.wiki.model.WikiNode;
58 import com.liferay.portlet.wiki.model.WikiPage;
59 import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
60 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
61 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
62
63 import java.io.BufferedReader;
64 import java.io.File;
65 import java.io.FileReader;
66 import java.io.IOException;
67
68 import java.util.ArrayList;
69 import java.util.Collections;
70 import java.util.HashMap;
71 import java.util.Iterator;
72 import java.util.List;
73 import java.util.Map;
74 import java.util.regex.Matcher;
75 import java.util.regex.Pattern;
76
77
84 public class MediaWikiImporter implements WikiImporter {
85
86 public static final String SHARED_IMAGES_CONTENT = "See attachments";
87
88 public static final String SHARED_IMAGES_TITLE = "SharedImages";
89
90 public void importPages(
91 long userId, WikiNode node, File[] files,
92 Map<String, String[]> options)
93 throws PortalException {
94
95 if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
96 throw new PortalException("The pages file is mandatory");
97 }
98
99 File pagesFile = files[0];
100 File usersFile = files[1];
101 File imagesFile = files[2];
102
103 try {
104 Document doc = SAXReaderUtil.read(pagesFile);
105
106 Map<String, String> usersMap = readUsersFile(usersFile);
107
108 Element root = doc.getRootElement();
109
110 List<String> specialNamespaces = readSpecialNamespaces(root);
111
112 processSpecialPages(userId, node, root, specialNamespaces);
113 processRegularPages(
114 userId, node, root, specialNamespaces, usersMap, imagesFile,
115 options);
116 processImages(userId, node, imagesFile);
117
118 moveFrontPage(userId, node, options);
119 }
120 catch (DocumentException de) {
121 throw new ImportFilesException("Invalid XML file provided");
122 }
123 catch (IOException de) {
124 throw new ImportFilesException("Error reading the files provided");
125 }
126 catch (PortalException e) {
127 throw e;
128 }
129 catch (Exception e) {
130 throw new PortalException(e);
131 }
132 }
133
134 protected long getUserId(
135 long userId, WikiNode node, String author,
136 Map<String, String> usersMap)
137 throws PortalException, SystemException {
138
139 User user = null;
140
141 String emailAddress = usersMap.get(author);
142
143 try {
144 if (Validator.isNull(emailAddress)) {
145 user = UserLocalServiceUtil.getUserByScreenName(
146 node.getCompanyId(), author.toLowerCase());
147 }
148 else {
149 user = UserLocalServiceUtil.getUserByEmailAddress(
150 node.getCompanyId(), emailAddress);
151 }
152 }
153 catch (NoSuchUserException nsue) {
154 user = UserLocalServiceUtil.getUserById(userId);
155 }
156
157 return user.getUserId();
158 }
159
160 protected void importPage(
161 long userId, String author, WikiNode node, String title,
162 String content, String summary, Map<String, String> usersMap)
163 throws PortalException {
164
165 try {
166 long authorUserId = getUserId(userId, node, author, usersMap);
167 String parentTitle = readParentTitle(content);
168 String redirectTitle = readRedirectTitle(content);
169
170 ServiceContext serviceContext = new ServiceContext();
171
172 serviceContext.setTagsEntries(
173 readTagsEntries(userId, node, content));
174
175 if (Validator.isNull(redirectTitle)) {
176 content = _translator.translate(content);
177 }
178 else {
179 content =
180 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
181 StringPool.DOUBLE_CLOSE_BRACKET;
182 }
183
184 WikiPage page = null;
185
186 try {
187 page = WikiPageLocalServiceUtil.getPage(
188 node.getNodeId(), title);
189 }
190 catch (NoSuchPageException nspe) {
191 page = WikiPageLocalServiceUtil.addPage(
192 authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
193 null, true, serviceContext);
194 }
195
196 WikiPageLocalServiceUtil.updatePage(
197 authorUserId, node.getNodeId(), title, page.getVersion(),
198 content, summary, true, "creole", parentTitle, redirectTitle,
199 serviceContext);
200 }
201 catch (Exception e) {
202 throw new PortalException("Error importing page " + title, e);
203 }
204 }
205
206 protected boolean isSpecialMediaWikiPage(
207 String title, List<String> specialNamespaces) {
208
209 for (String namespace: specialNamespaces) {
210 if (title.startsWith(namespace + StringPool.COLON)) {
211 return true;
212 }
213 }
214
215 return false;
216 }
217
218 protected boolean isValidImage(String[] paths, byte[] bytes) {
219 if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
220 return false;
221 }
222
223 if ((paths.length > 1) &&
224 (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
225
226 return false;
227 }
228
229 String fileName = paths[paths.length - 1];
230
231 try {
232 DLLocalServiceUtil.validate(fileName, bytes);
233 }
234 catch (PortalException pe) {
235 return false;
236 }
237 catch (SystemException se) {
238 return false;
239 }
240
241 return true;
242 }
243
244 protected void moveFrontPage(
245 long userId, WikiNode node, Map<String, String[]> options) {
246
247 String frontPageTitle = MapUtil.getString(
248 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
249
250 if (Validator.isNotNull(frontPageTitle)) {
251 frontPageTitle = normalizeTitle(frontPageTitle);
252
253 try {
254 if (WikiPageLocalServiceUtil.getPagesCount(
255 node.getNodeId(), frontPageTitle, true) > 0) {
256
257 ServiceContext serviceContext = new ServiceContext();
258
259 WikiPageLocalServiceUtil.movePage(
260 userId, node.getNodeId(), frontPageTitle,
261 WikiPageImpl.FRONT_PAGE, false, serviceContext);
262
263 }
264 }
265 catch (Exception e) {
266 if (_log.isWarnEnabled()) {
267 StringBuilder sb = new StringBuilder();
268
269 sb.append("Could not move ");
270 sb.append(WikiPageImpl.FRONT_PAGE);
271 sb.append(" to the title provided: ");
272 sb.append(frontPageTitle);
273
274 _log.warn(sb.toString(), e);
275 }
276 }
277
278 }
279
280 }
281
282 protected String normalize(String categoryName, int length) {
283 categoryName = TagsUtil.toWord(categoryName.trim());
284
285 return StringUtil.shorten(categoryName, length);
286 }
287
288 protected String normalizeDescription(String description) {
289 description = description.replaceAll(
290 _categoriesPattern.pattern(), StringPool.BLANK);
291
292 return normalize(description, 300);
293 }
294
295 protected String normalizeTitle(String title) {
296 title = title.replaceAll(
297 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
298
299 return StringUtil.shorten(title, 75);
300 }
301
302 private void processImages(long userId, WikiNode node, File imagesFile)
303 throws Exception {
304
305 if ((imagesFile == null) || (!imagesFile.exists())) {
306 return;
307 }
308
309 ProgressTracker progressTracker =
310 ProgressTrackerThreadLocal.getProgressTracker();
311
312 int count = 0;
313
314 ZipReader zipReader = new ZipReader(imagesFile);
315
316 Map<String, byte[]> entries = zipReader.getEntries();
317
318 int total = entries.size();
319
320 if (total > 0) {
321 try {
322 WikiPageLocalServiceUtil.getPage(
323 node.getNodeId(), SHARED_IMAGES_TITLE);
324 }
325 catch (NoSuchPageException nspe) {
326 ServiceContext serviceContext = new ServiceContext();
327
328 WikiPageLocalServiceUtil.addPage(
329 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
330 SHARED_IMAGES_CONTENT, null, true, serviceContext);
331 }
332 }
333
334 List<ObjectValuePair<String, byte[]>> attachments =
335 new ArrayList<ObjectValuePair<String, byte[]>>();
336
337 Iterator<Map.Entry<String, byte[]>> itr = entries.entrySet().iterator();
338
339 int percentage = 50;
340
341 for (int i = 0; itr.hasNext(); i++) {
342 Map.Entry<String, byte[]> entry = itr.next();
343
344 String key = entry.getKey();
345 byte[] value = entry.getValue();
346
347 if (key.endsWith(StringPool.SLASH)) {
348 if (_log.isInfoEnabled()) {
349 _log.info("Ignoring " + key);
350 }
351
352 continue;
353 }
354
355 String[] paths = StringUtil.split(key, StringPool.SLASH);
356
357 if (!isValidImage(paths, value)) {
358 if (_log.isInfoEnabled()) {
359 _log.info("Ignoring " + key);
360 }
361
362 continue;
363 }
364
365 String fileName = paths[paths.length - 1].toLowerCase();
366
367 attachments.add(
368 new ObjectValuePair<String, byte[]>(fileName, value));
369
370 count++;
371
372 if ((i % 5) == 0) {
373 WikiPageLocalServiceUtil.addPageAttachments(
374 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
375
376 attachments.clear();
377
378 percentage = Math.min(50 + (i * 50) / total, 99);
379
380 progressTracker.updateProgress(percentage);
381 }
382 }
383
384 if (!attachments.isEmpty()) {
385 WikiPageLocalServiceUtil.addPageAttachments(
386 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
387 }
388
389 if (_log.isInfoEnabled()) {
390 _log.info("Imported " + count + " images into " + node.getName());
391 }
392 }
393
394 protected void processRegularPages(
395 long userId, WikiNode node, Element root,
396 List<String> specialNamespaces, Map<String, String> usersMap,
397 File imagesFile, Map<String, String[]> options) {
398
399 boolean importLatestVersion = MapUtil.getBoolean(
400 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
401
402 ProgressTracker progressTracker =
403 ProgressTrackerThreadLocal.getProgressTracker();
404
405 int count = 0;
406
407 List<Element> pages = root.elements("page");
408
409 int total = pages.size();
410
411 Iterator<Element> itr = root.elements("page").iterator();
412
413 int percentage = 10;
414 int maxPercentage = 50;
415
416 if ((imagesFile == null) || (!imagesFile.exists())) {
417 maxPercentage = 99;
418 }
419
420 int percentageRange = maxPercentage - percentage;
421
422 for (int i = 0; itr.hasNext(); i++) {
423 Element pageEl = itr.next();
424
425 String title = pageEl.elementText("title");
426
427 title = normalizeTitle(title);
428
429 percentage = Math.min(
430 10 + (i * percentageRange) / total, maxPercentage);
431
432 progressTracker.updateProgress(percentage);
433
434 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
435 continue;
436 }
437
438 List<Element> revisionEls = pageEl.elements("revision");
439
440 if (importLatestVersion) {
441 Element lastRevisionEl = revisionEls.get(
442 revisionEls.size() - 1);
443
444 revisionEls = new ArrayList<Element>();
445
446 revisionEls.add(lastRevisionEl);
447 }
448
449 for (Element curRevisionEl : revisionEls) {
450 String author = curRevisionEl.element(
451 "contributor").elementText("username");
452 String content = curRevisionEl.elementText("text");
453 String summary = curRevisionEl.elementText("comment");
454
455 try {
456 importPage(
457 userId, author, node, title, content, summary,
458 usersMap);
459 }
460 catch (Exception e) {
461 if (_log.isWarnEnabled()) {
462 StringBuilder sb = new StringBuilder();
463
464 sb.append("Page with title ");
465 sb.append(title);
466 sb.append(" could not be imported");
467
468 _log.warn(sb.toString(), e);
469 }
470 }
471 }
472
473 count++;
474 }
475
476 if (_log.isInfoEnabled()) {
477 _log.info("Imported " + count + " pages into " + node.getName());
478 }
479 }
480
481 protected void processSpecialPages(
482 long userId, WikiNode node, Element root,
483 List<String> specialNamespaces)
484 throws PortalException {
485
486 ProgressTracker progressTracker =
487 ProgressTrackerThreadLocal.getProgressTracker();
488
489 List<Element> pages = root.elements("page");
490
491 int total = pages.size();
492
493 Iterator<Element> itr = pages.iterator();
494
495 for (int i = 0; itr.hasNext(); i++) {
496 Element page = itr.next();
497
498 String title = page.elementText("title");
499
500 if (!title.startsWith("Category:")) {
501 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
502 root.remove(page);
503 }
504
505 continue;
506 }
507
508 String categoryName = title.substring("Category:".length());
509
510 categoryName = normalize(categoryName, 75);
511
512 String description = page.element("revision").elementText("text");
513
514 description = normalizeDescription(description);
515
516 try {
517 TagsEntry tagsEntry = null;
518
519 try {
520 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
521 node.getCompanyId(), categoryName);
522 }
523 catch (NoSuchEntryException nsee) {
524 ServiceContext serviceContext = new ServiceContext();
525
526 serviceContext.setAddCommunityPermissions(true);
527 serviceContext.setAddGuestPermissions(true);
528 serviceContext.setScopeGroupId(node.getGroupId());
529
530 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
531 userId, null, categoryName, null, null, serviceContext);
532 }
533
534 if (Validator.isNotNull(description)) {
535 TagsPropertyLocalServiceUtil.addProperty(
536 userId, tagsEntry.getEntryId(), "description",
537 description);
538 }
539 }
540 catch (SystemException se) {
541 _log.error(se, se);
542 }
543
544 if ((i % 5) == 0) {
545 progressTracker.updateProgress((i * 10) / total);
546 }
547 }
548 }
549
550 protected String readParentTitle(String content) {
551 Matcher matcher = _parentPattern.matcher(content);
552
553 String redirectTitle = StringPool.BLANK;
554
555 if (matcher.find()) {
556 redirectTitle = matcher.group(1);
557
558 redirectTitle = normalizeTitle(redirectTitle);
559
560 redirectTitle += " (disambiguation)";
561 }
562
563 return redirectTitle;
564 }
565
566 protected String readRedirectTitle(String content) {
567 Matcher matcher = _redirectPattern.matcher(content);
568
569 String redirectTitle = StringPool.BLANK;
570
571 if (matcher.find()) {
572 redirectTitle = matcher.group(1);
573
574 redirectTitle = normalizeTitle(redirectTitle);
575 }
576
577 return redirectTitle;
578 }
579
580 protected List<String> readSpecialNamespaces(Element root)
581 throws ImportFilesException {
582
583 List<String> namespaces = new ArrayList<String>();
584
585 Element siteinfoEl = root.element("siteinfo");
586
587 if (siteinfoEl == null) {
588 throw new ImportFilesException("Invalid pages XML file");
589 }
590
591 Iterator<Element> itr = siteinfoEl.element(
592 "namespaces").elements("namespace").iterator();
593
594 while (itr.hasNext()) {
595 Element namespace = itr.next();
596
597 if (!namespace.attribute("key").getData().equals("0")) {
598 namespaces.add(namespace.getText());
599 }
600 }
601
602 return namespaces;
603 }
604
605 protected String[] readTagsEntries(
606 long userId, WikiNode node, String content)
607 throws PortalException, SystemException {
608
609 Matcher matcher = _categoriesPattern.matcher(content);
610
611 List<String> tagsEntries = new ArrayList<String>();
612
613 while (matcher.find()) {
614 String categoryName = matcher.group(1);
615
616 categoryName = normalize(categoryName, 75);
617
618 TagsEntry tagsEntry = null;
619
620 try {
621 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
622 node.getCompanyId(), categoryName);
623 }
624 catch (NoSuchEntryException nsee) {
625 ServiceContext serviceContext = new ServiceContext();
626
627 serviceContext.setAddCommunityPermissions(true);
628 serviceContext.setAddGuestPermissions(true);
629 serviceContext.setScopeGroupId(node.getGroupId());
630
631 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
632 userId, null, categoryName, null, null, serviceContext);
633 }
634
635 tagsEntries.add(tagsEntry.getName());
636 }
637
638 if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
639 tagsEntries.add(_WORK_IN_PROGRESS_TAG);
640 }
641
642 return tagsEntries.toArray(new String[tagsEntries.size()]);
643 }
644
645 protected Map<String, String> readUsersFile(File usersFile)
646 throws IOException {
647
648 if ((usersFile == null) || (!usersFile.exists())) {
649 return Collections.EMPTY_MAP;
650 }
651
652 Map<String, String> usersMap = new HashMap<String, String>();
653
654 BufferedReader reader = new BufferedReader(new FileReader(usersFile));
655
656 String line = reader.readLine();
657
658 while (line != null) {
659 String[] array = StringUtil.split(line);
660
661 if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
662 (Validator.isNotNull(array[1]))) {
663
664 usersMap.put(array[0], array[1]);
665 }
666 else {
667 if (_log.isInfoEnabled()) {
668 _log.info(
669 "Ignoring line " + line +
670 " because it does not contain exactly 2 columns");
671 }
672 }
673
674 line = reader.readLine();
675 }
676
677 return usersMap;
678 }
679
680 private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = new String[]{
681 "thumb", "temp", "archive"};
682
683 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
684
685 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
686
687 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
688
689 private static Pattern _categoriesPattern = Pattern.compile(
690 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
691 private static Pattern _parentPattern = Pattern.compile(
692 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
693 private static Pattern _redirectPattern = Pattern.compile(
694 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
695
696 private MediaWikiToCreoleTranslator _translator =
697 new MediaWikiToCreoleTranslator();
698
699 }