1
22
23 package com.liferay.portlet.wiki.importers.mediawiki;
24
25 import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26 import com.liferay.portal.NoSuchUserException;
27 import com.liferay.portal.PortalException;
28 import com.liferay.portal.SystemException;
29 import com.liferay.portal.kernel.log.Log;
30 import com.liferay.portal.kernel.log.LogFactoryUtil;
31 import com.liferay.portal.kernel.util.ArrayUtil;
32 import com.liferay.portal.kernel.util.MapUtil;
33 import com.liferay.portal.kernel.util.ObjectValuePair;
34 import com.liferay.portal.kernel.util.ProgressTracker;
35 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
36 import com.liferay.portal.kernel.util.StringPool;
37 import com.liferay.portal.kernel.util.StringUtil;
38 import com.liferay.portal.kernel.util.Validator;
39 import com.liferay.portal.kernel.xml.Document;
40 import com.liferay.portal.kernel.xml.DocumentException;
41 import com.liferay.portal.kernel.xml.Element;
42 import com.liferay.portal.kernel.xml.SAXReaderUtil;
43 import com.liferay.portal.kernel.zip.ZipReader;
44 import com.liferay.portal.model.User;
45 import com.liferay.portal.service.UserLocalServiceUtil;
46 import com.liferay.portal.util.PropsValues;
47 import com.liferay.portlet.tags.NoSuchEntryException;
48 import com.liferay.portlet.tags.model.TagsEntry;
49 import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
50 import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
51 import com.liferay.portlet.tags.util.TagsUtil;
52 import com.liferay.portlet.wiki.ImportFilesException;
53 import com.liferay.portlet.wiki.NoSuchPageException;
54 import com.liferay.portlet.wiki.importers.WikiImporter;
55 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
56 import com.liferay.portlet.wiki.model.WikiNode;
57 import com.liferay.portlet.wiki.model.WikiPage;
58 import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
59 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
60 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
61
62 import java.io.BufferedReader;
63 import java.io.File;
64 import java.io.FileReader;
65 import java.io.IOException;
66
67 import java.util.ArrayList;
68 import java.util.Collections;
69 import java.util.HashMap;
70 import java.util.Iterator;
71 import java.util.List;
72 import java.util.Map;
73 import java.util.regex.Matcher;
74 import java.util.regex.Pattern;
75
76
82 public class MediaWikiImporter implements WikiImporter {
83
84 public static final String SHARED_IMAGES_CONTENT = "See attachments";
85
86 public static final String SHARED_IMAGES_TITLE = "SharedImages";
87
88 public void importPages(
89 long userId, WikiNode node, File[] files,
90 Map<String, String[]> options)
91 throws PortalException {
92
93 if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
94 throw new PortalException("The pages file is mandatory");
95 }
96
97 File pagesFile = files[0];
98 File usersFile = files[1];
99 File imagesFile = files[2];
100
101 try {
102 Document doc = SAXReaderUtil.read(pagesFile);
103
104 Map<String, String> usersMap = readUsersFile(usersFile);
105
106 Element root = doc.getRootElement();
107
108 List<String> specialNamespaces = readSpecialNamespaces(root);
109
110 processSpecialPages(userId, node, root, specialNamespaces);
111 processRegularPages(
112 userId, node, root, specialNamespaces, usersMap, imagesFile,
113 options);
114 processImages(userId, node, imagesFile);
115
116 moveFrontPage(userId, node, options);
117 }
118 catch (DocumentException de) {
119 throw new ImportFilesException("Invalid XML file provided");
120 }
121 catch (IOException de) {
122 throw new ImportFilesException("Error reading the files provided");
123 }
124 catch (PortalException e) {
125 throw e;
126 }
127 catch (Exception e) {
128 throw new PortalException(e);
129 }
130 }
131
132 protected long getUserId(
133 long userId, WikiNode node, String author,
134 Map<String, String> usersMap)
135 throws PortalException, SystemException {
136
137 User user = null;
138
139 String emailAddress = usersMap.get(author);
140
141 try {
142 if (Validator.isNull(emailAddress)) {
143 user = UserLocalServiceUtil.getUserByScreenName(
144 node.getCompanyId(), author.toLowerCase());
145 }
146 else {
147 user = UserLocalServiceUtil.getUserByEmailAddress(
148 node.getCompanyId(), emailAddress);
149 }
150 }
151 catch (NoSuchUserException nsue) {
152 user = UserLocalServiceUtil.getUserById(userId);
153 }
154
155 return user.getUserId();
156 }
157
158 protected void importPage(
159 long userId, String author, WikiNode node, String title,
160 String content, String summary, Map<String, String> usersMap)
161 throws PortalException {
162
163 try {
164 long authorUserId = getUserId(userId, node, author, usersMap);
165 String parentTitle = readParentTitle(content);
166 String redirectTitle = readRedirectTitle(content);
167 String[] tagsEntries = readTagsEntries(userId, node, content);
168
169 if (Validator.isNull(redirectTitle)) {
170 content = _translator.translate(content);
171 }
172 else {
173 content =
174 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
175 StringPool.DOUBLE_CLOSE_BRACKET;
176 }
177
178 WikiPage page = null;
179
180 try {
181 page = WikiPageLocalServiceUtil.getPage(
182 node.getNodeId(), title);
183 }
184 catch (NoSuchPageException nspe) {
185 page = WikiPageLocalServiceUtil.addPage(
186 authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
187 null, true, null, null);
188 }
189
190 WikiPageLocalServiceUtil.updatePage(
191 authorUserId, node.getNodeId(), title, page.getVersion(),
192 content, summary, true, "creole", parentTitle,
193 redirectTitle, tagsEntries, null, null);
194 }
195 catch (Exception e) {
196 throw new PortalException("Error importing page " + title, e);
197 }
198 }
199
200 protected boolean isSpecialMediaWikiPage(
201 String title, List<String> specialNamespaces) {
202
203 for (String namespace: specialNamespaces) {
204 if (title.startsWith(namespace + StringPool.COLON)) {
205 return true;
206 }
207 }
208
209 return false;
210 }
211
212 protected boolean isValidImage(String[] paths, byte[] bytes) {
213 if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
214 return false;
215 }
216
217 if ((paths.length > 1) &&
218 (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
219
220 return false;
221 }
222
223 String fileName = paths[paths.length - 1];
224
225 try {
226 DLLocalServiceUtil.validate(fileName, bytes);
227 }
228 catch (PortalException pe) {
229 return false;
230 }
231 catch (SystemException se) {
232 return false;
233 }
234
235 return true;
236 }
237
238 protected void moveFrontPage(
239 long userId, WikiNode node, Map<String, String[]> options) {
240
241 String frontPageTitle = MapUtil.getString(
242 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
243
244 if (Validator.isNotNull(frontPageTitle)) {
245 frontPageTitle = normalizeTitle(frontPageTitle);
246
247 try {
248 if (WikiPageLocalServiceUtil.getPagesCount(
249 node.getNodeId(), frontPageTitle, true) > 0) {
250
251 WikiPageLocalServiceUtil.movePage(
252 userId, node.getNodeId(), frontPageTitle,
253 WikiPageImpl.FRONT_PAGE, false, null, null);
254
255 }
256 }
257 catch (Exception e) {
258 if (_log.isWarnEnabled()) {
259 StringBuilder sb = new StringBuilder();
260
261 sb.append("Could not move ");
262 sb.append(WikiPageImpl.FRONT_PAGE);
263 sb.append(" to the title provided: ");
264 sb.append(frontPageTitle);
265
266 _log.warn(sb.toString(), e);
267 }
268 }
269
270 }
271
272 }
273
274 protected String normalize(String categoryName, int length) {
275 categoryName = TagsUtil.toWord(categoryName.trim());
276
277 return StringUtil.shorten(categoryName, length);
278 }
279
280 protected String normalizeDescription(String description) {
281 description = description.replaceAll(
282 _categoriesPattern.pattern(), StringPool.BLANK);
283
284 return normalize(description, 300);
285 }
286
287 protected String normalizeTitle(String title) {
288 title = title.replaceAll(
289 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
290
291 return StringUtil.shorten(title, 75);
292 }
293
294 private void processImages(long userId, WikiNode node, File imagesFile)
295 throws Exception {
296
297 if ((imagesFile == null) || (!imagesFile.exists())) {
298 return;
299 }
300
301 ProgressTracker progressTracker =
302 ProgressTrackerThreadLocal.getProgressTracker();
303
304 int count = 0;
305
306 ZipReader zipReader = new ZipReader(imagesFile);
307
308 Map<String, byte[]> entries = zipReader.getEntries();
309
310 int total = entries.size();
311
312 if (total > 0) {
313 try {
314 WikiPageLocalServiceUtil.getPage(
315 node.getNodeId(), SHARED_IMAGES_TITLE);
316 }
317 catch (NoSuchPageException nspe) {
318 WikiPageLocalServiceUtil.addPage(
319 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
320 SHARED_IMAGES_CONTENT, null, true, null, null);
321 }
322 }
323
324 List<ObjectValuePair<String, byte[]>> attachments =
325 new ArrayList<ObjectValuePair<String, byte[]>>();
326
327 Iterator<Map.Entry<String, byte[]>> itr = entries.entrySet().iterator();
328
329 int percentage = 50;
330
331 for (int i = 0; itr.hasNext(); i++) {
332 Map.Entry<String, byte[]> entry = itr.next();
333
334 String key = entry.getKey();
335 byte[] value = entry.getValue();
336
337 if (key.endsWith(StringPool.SLASH)) {
338 if (_log.isInfoEnabled()) {
339 _log.info("Ignoring " + key);
340 }
341
342 continue;
343 }
344
345 String[] paths = StringUtil.split(key, StringPool.SLASH);
346
347 if (!isValidImage(paths, value)) {
348 if (_log.isInfoEnabled()) {
349 _log.info("Ignoring " + key);
350 }
351
352 continue;
353 }
354
355 String fileName = paths[paths.length - 1].toLowerCase();
356
357 attachments.add(
358 new ObjectValuePair<String, byte[]>(fileName, value));
359
360 count++;
361
362 if ((i % 5) == 0) {
363 WikiPageLocalServiceUtil.addPageAttachments(
364 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
365
366 attachments.clear();
367
368 percentage = Math.min(50 + (i * 50) / total, 99);
369
370 progressTracker.updateProgress(percentage);
371 }
372 }
373
374 if (!attachments.isEmpty()) {
375 WikiPageLocalServiceUtil.addPageAttachments(
376 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
377 }
378
379 if (_log.isInfoEnabled()) {
380 _log.info("Imported " + count + " images into " + node.getName());
381 }
382 }
383
384 protected void processRegularPages(
385 long userId, WikiNode node, Element root,
386 List<String> specialNamespaces, Map<String, String> usersMap,
387 File imagesFile, Map<String, String[]> options) {
388
389 boolean importLatestVersion = MapUtil.getBoolean(
390 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
391
392 ProgressTracker progressTracker =
393 ProgressTrackerThreadLocal.getProgressTracker();
394
395 int count = 0;
396
397 List<Element> pages = root.elements("page");
398
399 int total = pages.size();
400
401 Iterator<Element> itr = root.elements("page").iterator();
402
403 int percentage = 10;
404 int maxPercentage = 50;
405
406 if ((imagesFile == null) || (!imagesFile.exists())) {
407 maxPercentage = 99;
408 }
409
410 int percentageRange = maxPercentage - percentage;
411
412 for (int i = 0; itr.hasNext(); i++) {
413 Element pageEl = itr.next();
414
415 String title = pageEl.elementText("title");
416
417 title = normalizeTitle(title);
418
419 percentage = Math.min(
420 10 + (i * percentageRange) / total, maxPercentage);
421
422 progressTracker.updateProgress(percentage);
423
424 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
425 continue;
426 }
427
428 List<Element> revisionEls = pageEl.elements("revision");
429
430 if (importLatestVersion) {
431 Element lastRevisionEl = revisionEls.get(
432 revisionEls.size() - 1);
433
434 revisionEls = new ArrayList<Element>();
435
436 revisionEls.add(lastRevisionEl);
437 }
438
439 for (Element curRevisionEl : revisionEls) {
440 String author = curRevisionEl.element(
441 "contributor").elementText("username");
442 String content = curRevisionEl.elementText("text");
443 String summary = curRevisionEl.elementText("comment");
444
445 try {
446 importPage(
447 userId, author, node, title, content, summary,
448 usersMap);
449 }
450 catch (Exception e) {
451 if (_log.isWarnEnabled()) {
452 StringBuilder sb = new StringBuilder();
453
454 sb.append("Page with title ");
455 sb.append(title);
456 sb.append(" could not be imported");
457
458 _log.warn(sb.toString(), e);
459 }
460 }
461 }
462
463 count++;
464 }
465
466 if (_log.isInfoEnabled()) {
467 _log.info("Imported " + count + " pages into " + node.getName());
468 }
469 }
470
471 protected void processSpecialPages(
472 long userId, WikiNode node, Element root,
473 List<String> specialNamespaces)
474 throws PortalException {
475
476 ProgressTracker progressTracker =
477 ProgressTrackerThreadLocal.getProgressTracker();
478
479 List<Element> pages = root.elements("page");
480
481 int total = pages.size();
482
483 Iterator<Element> itr = pages.iterator();
484
485 for (int i = 0; itr.hasNext(); i++) {
486 Element page = itr.next();
487
488 String title = page.elementText("title");
489
490 if (!title.startsWith("Category:")) {
491 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
492 root.remove(page);
493 }
494
495 continue;
496 }
497
498 String categoryName = title.substring("Category:".length());
499
500 categoryName = normalize(categoryName, 75);
501
502 String description = page.element("revision").elementText("text");
503
504 description = normalizeDescription(description);
505
506 try {
507 TagsEntry tagsEntry = null;
508
509 try {
510 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
511 node.getCompanyId(), categoryName);
512 }
513 catch (NoSuchEntryException nsee) {
514 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
515 userId, categoryName);
516 }
517
518 if (Validator.isNotNull(description)) {
519 TagsPropertyLocalServiceUtil.addProperty(
520 userId, tagsEntry.getEntryId(), "description",
521 description);
522 }
523 }
524 catch (SystemException se) {
525 _log.error(se, se);
526 }
527
528 if ((i % 5) == 0) {
529 progressTracker.updateProgress((i * 10) / total);
530 }
531 }
532 }
533
534 protected String readParentTitle(String content) {
535 Matcher matcher = _parentPattern.matcher(content);
536
537 String redirectTitle = StringPool.BLANK;
538
539 if (matcher.find()) {
540 redirectTitle = matcher.group(1);
541
542 redirectTitle = normalizeTitle(redirectTitle);
543
544 redirectTitle += " (disambiguation)";
545 }
546
547 return redirectTitle;
548 }
549
550 protected String readRedirectTitle(String content) {
551 Matcher matcher = _redirectPattern.matcher(content);
552
553 String redirectTitle = StringPool.BLANK;
554
555 if (matcher.find()) {
556 redirectTitle = matcher.group(1);
557
558 redirectTitle = normalizeTitle(redirectTitle);
559 }
560
561 return redirectTitle;
562 }
563
564 protected List<String> readSpecialNamespaces(Element root)
565 throws ImportFilesException {
566
567 List<String> namespaces = new ArrayList<String>();
568
569 Element siteinfoEl = root.element("siteinfo");
570
571 if (siteinfoEl == null) {
572 throw new ImportFilesException("Invalid pages XML file");
573 }
574
575 Iterator<Element> itr = siteinfoEl.element(
576 "namespaces").elements("namespace").iterator();
577
578 while (itr.hasNext()) {
579 Element namespace = itr.next();
580
581 if (!namespace.attribute("key").getData().equals("0")) {
582 namespaces.add(namespace.getText());
583 }
584 }
585
586 return namespaces;
587 }
588
589 protected String[] readTagsEntries(
590 long userId, WikiNode node, String content)
591 throws PortalException, SystemException {
592
593 Matcher matcher = _categoriesPattern.matcher(content);
594
595 List<String> tagsEntries = new ArrayList<String>();
596
597 while (matcher.find()) {
598 String categoryName = matcher.group(1);
599
600 categoryName = normalize(categoryName, 75);
601
602 TagsEntry tagsEntry = null;
603
604 try {
605 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
606 node.getCompanyId(), categoryName);
607 }
608 catch (NoSuchEntryException nsee) {
609 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
610 userId, categoryName);
611 }
612
613 tagsEntries.add(tagsEntry.getName());
614 }
615
616 if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
617 tagsEntries.add(_WORK_IN_PROGRESS_TAG);
618 }
619
620 return tagsEntries.toArray(new String[tagsEntries.size()]);
621 }
622
623 protected Map<String, String> readUsersFile(File usersFile)
624 throws IOException {
625
626 if ((usersFile == null) || (!usersFile.exists())) {
627 return Collections.EMPTY_MAP;
628 }
629
630 Map<String, String> usersMap = new HashMap<String, String>();
631
632 BufferedReader reader = new BufferedReader(new FileReader(usersFile));
633
634 String line = reader.readLine();
635
636 while (line != null) {
637 String[] array = StringUtil.split(line);
638
639 if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
640 (Validator.isNotNull(array[1]))) {
641
642 usersMap.put(array[0], array[1]);
643 }
644 else {
645 if (_log.isInfoEnabled()) {
646 _log.info(
647 "Ignoring line " + line +
648 " because it does not contain exactly 2 columns");
649 }
650 }
651
652 line = reader.readLine();
653 }
654
655 return usersMap;
656 }
657
658 private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
659 "thumb", "temp", "archive"
660 };
661
662 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
663
664 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
665
666 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
667
668 private static Pattern _categoriesPattern = Pattern.compile(
669 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
670 private static Pattern _parentPattern = Pattern.compile(
671 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
672 private static Pattern _redirectPattern = Pattern.compile(
673 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
674
675 private MediaWikiToCreoleTranslator _translator =
676 new MediaWikiToCreoleTranslator();
677
678 }