1
22
23 package com.liferay.portlet.wiki.importers.mediawiki;
24
25 import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26 import com.liferay.portal.NoSuchUserException;
27 import com.liferay.portal.PortalException;
28 import com.liferay.portal.SystemException;
29 import com.liferay.portal.kernel.util.ArrayUtil;
30 import com.liferay.portal.kernel.util.ObjectValuePair;
31 import com.liferay.portal.kernel.util.ProgressTracker;
32 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
33 import com.liferay.portal.kernel.util.StringPool;
34 import com.liferay.portal.kernel.util.StringUtil;
35 import com.liferay.portal.kernel.util.Validator;
36 import com.liferay.portal.kernel.zip.ZipReader;
37 import com.liferay.portal.model.User;
38 import com.liferay.portal.service.UserLocalServiceUtil;
39 import com.liferay.portal.util.PropsValues;
40 import com.liferay.portlet.tags.NoSuchEntryException;
41 import com.liferay.portlet.tags.model.TagsEntry;
42 import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
43 import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
44 import com.liferay.portlet.tags.util.TagsUtil;
45 import com.liferay.portlet.wiki.ImportFilesException;
46 import com.liferay.portlet.wiki.NoSuchPageException;
47 import com.liferay.portlet.wiki.importers.WikiImporter;
48 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
49 import com.liferay.portlet.wiki.model.WikiNode;
50 import com.liferay.portlet.wiki.model.WikiPage;
51 import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
52 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
53 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
54 import com.liferay.util.MapUtil;
55
56 import java.io.BufferedReader;
57 import java.io.File;
58 import java.io.FileReader;
59 import java.io.IOException;
60
61 import java.util.ArrayList;
62 import java.util.Collections;
63 import java.util.HashMap;
64 import java.util.Iterator;
65 import java.util.List;
66 import java.util.Map;
67 import java.util.regex.Matcher;
68 import java.util.regex.Pattern;
69
70 import org.apache.commons.logging.Log;
71 import org.apache.commons.logging.LogFactory;
72
73 import org.dom4j.Document;
74 import org.dom4j.DocumentException;
75 import org.dom4j.Element;
76 import org.dom4j.io.SAXReader;
77
78
85 public class MediaWikiImporter implements WikiImporter {
86
87 public static final String SHARED_IMAGES_CONTENT = "See attachments";
88
89 public static final String SHARED_IMAGES_TITLE = "SharedImages";
90
91 public void importPages(
92 long userId, WikiNode node, File[] files,
93 Map<String, String[]> options)
94 throws PortalException {
95
96 if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
97 throw new PortalException("The pages file is mandatory");
98 }
99
100 File pagesFile = files[0];
101 File usersFile = files[1];
102 File imagesFile = files[2];
103
104 try {
105 SAXReader saxReader = new SAXReader();
106
107 Document doc = saxReader.read(pagesFile);
108
109 Map<String, String> usersMap = readUsersFile(usersFile);
110
111 Element root = doc.getRootElement();
112
113 List<String> specialNamespaces = readSpecialNamespaces(root);
114
115 processSpecialPages(userId, node, root, specialNamespaces);
116 processRegularPages(
117 userId, node, root, specialNamespaces, usersMap, imagesFile,
118 options);
119 processImages(userId, node, imagesFile);
120
121 moveFrontPage(userId, node, options);
122 }
123 catch (DocumentException de) {
124 throw new ImportFilesException("Invalid XML file provided");
125 }
126 catch (IOException de) {
127 throw new ImportFilesException("Error reading the files provided");
128 }
129 catch (PortalException e) {
130 throw e;
131 }
132 catch (Exception e) {
133 throw new PortalException(e);
134 }
135 }
136
137 protected long getUserId(
138 long userId, WikiNode node, String author,
139 Map<String, String> usersMap)
140 throws PortalException, SystemException {
141
142 User user = null;
143
144 String emailAddress = usersMap.get(author);
145
146 try {
147 if (Validator.isNull(emailAddress)) {
148 user = UserLocalServiceUtil.getUserByScreenName(
149 node.getCompanyId(), author.toLowerCase());
150 }
151 else {
152 user = UserLocalServiceUtil.getUserByEmailAddress(
153 node.getCompanyId(), emailAddress);
154 }
155 }
156 catch (NoSuchUserException nsue) {
157 user = UserLocalServiceUtil.getUserById(userId);
158 }
159
160 return user.getUserId();
161 }
162
163 protected void importPage(
164 long userId, String author, WikiNode node, String title,
165 String content, String summary, Map<String, String> usersMap)
166 throws PortalException {
167
168 try {
169 long authorUserId = getUserId(userId, node, author, usersMap);
170 String parentTitle = readParentTitle(content);
171 String redirectTitle = readRedirectTitle(content);
172 String[] tagsEntries = readTagsEntries(userId, node, content);
173
174 if (Validator.isNull(redirectTitle)) {
175 content = _translator.translate(content);
176 }
177 else {
178 content =
179 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
180 StringPool.DOUBLE_CLOSE_BRACKET;
181 }
182
183 WikiPage page = null;
184
185 try {
186 page = WikiPageLocalServiceUtil.getPage(
187 node.getNodeId(), title);
188 }
189 catch (NoSuchPageException nspe) {
190 page = WikiPageLocalServiceUtil.addPage(
191 authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
192 null, true, null, null);
193 }
194
195 WikiPageLocalServiceUtil.updatePage(
196 authorUserId, node.getNodeId(), title, page.getVersion(),
197 content, summary, true, "creole", parentTitle,
198 redirectTitle, tagsEntries, null, null);
199 }
200 catch (Exception e) {
201 throw new PortalException("Error importing page " + title, e);
202 }
203 }
204
205 protected boolean isSpecialMediaWikiPage(
206 String title, List<String> specialNamespaces) {
207
208 for (String namespace: specialNamespaces) {
209 if (title.startsWith(namespace + StringPool.COLON)) {
210 return true;
211 }
212 }
213
214 return false;
215 }
216
217 protected boolean isValidImage(String[] paths, byte[] bytes) {
218 if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
219 return false;
220 }
221
222 if ((paths.length > 1) &&
223 (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
224
225 return false;
226 }
227
228 String fileName = paths[paths.length - 1];
229
230 try {
231 DLLocalServiceUtil.validate(fileName, bytes);
232 }
233 catch (PortalException pe) {
234 return false;
235 }
236
237 return true;
238 }
239
240 protected void moveFrontPage(
241 long userId, WikiNode node, Map<String, String[]> options) {
242
243 String frontPageTitle = MapUtil.getString(
244 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
245
246 if (Validator.isNotNull(frontPageTitle)) {
247 frontPageTitle = normalizeTitle(frontPageTitle);
248
249 try {
250 if (WikiPageLocalServiceUtil.getPagesCount(
251 node.getNodeId(), frontPageTitle, true) > 0) {
252
253 WikiPageLocalServiceUtil.movePage(
254 userId, node.getNodeId(), frontPageTitle,
255 WikiPageImpl.FRONT_PAGE, false, null, null);
256
257 }
258 }
259 catch (Exception e) {
260 if (_log.isWarnEnabled()) {
261 StringBuilder sb = new StringBuilder();
262
263 sb.append("Could not move ");
264 sb.append(WikiPageImpl.FRONT_PAGE);
265 sb.append(" to the title provided: ");
266 sb.append(frontPageTitle);
267
268 _log.warn(sb.toString(), e);
269 }
270 }
271
272 }
273
274 }
275
276 protected String normalize(String categoryName, int length) {
277 categoryName = TagsUtil.toWord(categoryName.trim());
278
279 return StringUtil.shorten(categoryName, length);
280 }
281
282 protected String normalizeDescription(String description) {
283 description = description.replaceAll(
284 _categoriesPattern.pattern(), StringPool.BLANK);
285
286 return normalize(description, 300);
287 }
288
289 protected String normalizeTitle(String title) {
290 title = title.replaceAll(
291 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
292
293 return StringUtil.shorten(title, 75);
294 }
295
296 private void processImages(long userId, WikiNode node, File imagesFile)
297 throws Exception {
298
299 if ((imagesFile == null) || (!imagesFile.exists())) {
300 return;
301 }
302
303 ProgressTracker progressTracker =
304 ProgressTrackerThreadLocal.getProgressTracker();
305
306 int count = 0;
307
308 ZipReader zipReader = new ZipReader(imagesFile);
309
310 Map<String, byte[]> entries = zipReader.getEntries();
311
312 int total = entries.size();
313
314 if (total > 0) {
315 try {
316 WikiPageLocalServiceUtil.getPage(
317 node.getNodeId(), SHARED_IMAGES_TITLE);
318 }
319 catch (NoSuchPageException nspe) {
320 WikiPageLocalServiceUtil.addPage(
321 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
322 SHARED_IMAGES_CONTENT, null, true, null, null);
323 }
324 }
325
326 List<ObjectValuePair<String, byte[]>> attachments =
327 new ArrayList<ObjectValuePair<String, byte[]>>();
328
329 Iterator<Map.Entry<String, byte[]>> itr = entries.entrySet().iterator();
330
331 int percentage = 50;
332
333 for (int i = 0; itr.hasNext(); i++) {
334 Map.Entry<String, byte[]> entry = itr.next();
335
336 String key = entry.getKey();
337 byte[] value = entry.getValue();
338
339 if (key.endsWith(StringPool.SLASH)) {
340 if (_log.isInfoEnabled()) {
341 _log.info("Ignoring " + key);
342 }
343
344 continue;
345 }
346
347 String[] paths = StringUtil.split(key, StringPool.SLASH);
348
349 if (!isValidImage(paths, value)) {
350 if (_log.isInfoEnabled()) {
351 _log.info("Ignoring " + key);
352 }
353
354 continue;
355 }
356
357 String fileName = paths[paths.length - 1].toLowerCase();
358
359 attachments.add(
360 new ObjectValuePair<String, byte[]>(fileName, value));
361
362 count++;
363
364 if ((i % 5) == 0) {
365 WikiPageLocalServiceUtil.addPageAttachments(
366 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
367
368 attachments.clear();
369
370 percentage = Math.min(50 + (i * 50) / total, 99);
371
372 progressTracker.updateProgress(percentage);
373 }
374 }
375
376 if (!attachments.isEmpty()) {
377 WikiPageLocalServiceUtil.addPageAttachments(
378 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
379 }
380
381 if (_log.isInfoEnabled()) {
382 _log.info("Imported " + count + " images into " + node.getName());
383 }
384 }
385
386 protected void processRegularPages(
387 long userId, WikiNode node, Element root,
388 List<String> specialNamespaces, Map<String, String> usersMap,
389 File imagesFile, Map<String, String[]> options) {
390
391 boolean importLatestVersion = MapUtil.getBoolean(
392 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
393
394 ProgressTracker progressTracker =
395 ProgressTrackerThreadLocal.getProgressTracker();
396
397 int count = 0;
398
399 List<Element> pages = root.elements("page");
400
401 int total = pages.size();
402
403 Iterator<Element> itr = root.elements("page").iterator();
404
405 int percentage = 10;
406 int maxPercentage = 50;
407
408 if ((imagesFile == null) || (!imagesFile.exists())) {
409 maxPercentage = 99;
410 }
411
412 int percentageRange = maxPercentage - percentage;
413
414 for (int i = 0; itr.hasNext(); i++) {
415 Element pageEl = itr.next();
416
417 String title = pageEl.elementText("title");
418
419 title = normalizeTitle(title);
420
421 percentage = Math.min(
422 10 + (i * percentageRange) / total, maxPercentage);
423
424 progressTracker.updateProgress(percentage);
425
426 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
427 continue;
428 }
429
430 List<Element> revisionEls = pageEl.elements("revision");
431
432 if (importLatestVersion) {
433 Element lastRevisionEl = revisionEls.get(
434 revisionEls.size() - 1);
435
436 revisionEls = new ArrayList<Element>();
437
438 revisionEls.add(lastRevisionEl);
439 }
440
441 for (Element curRevisionEl : revisionEls) {
442 String author = curRevisionEl.element(
443 "contributor").elementText("username");
444 String content = curRevisionEl.elementText("text");
445 String summary = curRevisionEl.elementText("comment");
446
447 try {
448 importPage(
449 userId, author, node, title, content, summary,
450 usersMap);
451 }
452 catch (Exception e) {
453 if (_log.isWarnEnabled()) {
454 StringBuilder sb = new StringBuilder();
455
456 sb.append("Page with title ");
457 sb.append(title);
458 sb.append(" could not be imported");
459
460 _log.warn(sb.toString(), e);
461 }
462 }
463 }
464
465 count++;
466 }
467
468 if (_log.isInfoEnabled()) {
469 _log.info("Imported " + count + " pages into " + node.getName());
470 }
471 }
472
473 protected void processSpecialPages(
474 long userId, WikiNode node, Element root,
475 List<String> specialNamespaces)
476 throws PortalException {
477
478 ProgressTracker progressTracker =
479 ProgressTrackerThreadLocal.getProgressTracker();
480
481 List<Element> pages = root.elements("page");
482
483 int total = pages.size();
484
485 Iterator<Element> itr = pages.iterator();
486
487 for (int i = 0; itr.hasNext(); i++) {
488 Element page = itr.next();
489
490 String title = page.elementText("title");
491
492 if (!title.startsWith("Category:")) {
493 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
494 root.remove(page);
495 }
496
497 continue;
498 }
499
500 String categoryName = title.substring("Category:".length());
501
502 categoryName = normalize(categoryName, 75);
503
504 String description = page.element("revision").elementText("text");
505
506 description = normalizeDescription(description);
507
508 try {
509 TagsEntry tagsEntry = null;
510
511 try {
512 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
513 node.getCompanyId(), categoryName);
514 }
515 catch (NoSuchEntryException nsee) {
516 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
517 userId, categoryName);
518 }
519
520 if (Validator.isNotNull(description)) {
521 TagsPropertyLocalServiceUtil.addProperty(
522 userId, tagsEntry.getEntryId(), "description",
523 description);
524 }
525 }
526 catch (SystemException se) {
527 _log.error(se, se);
528 }
529
530 if ((i % 5) == 0) {
531 progressTracker.updateProgress((i * 10) / total);
532 }
533 }
534 }
535
536 protected String readParentTitle(String content) {
537 Matcher matcher = _parentPattern.matcher(content);
538
539 String redirectTitle = StringPool.BLANK;
540
541 if (matcher.find()) {
542 redirectTitle = matcher.group(1);
543
544 redirectTitle = normalizeTitle(redirectTitle);
545
546 redirectTitle += " (disambiguation)";
547 }
548
549 return redirectTitle;
550 }
551
552 protected String readRedirectTitle(String content) {
553 Matcher matcher = _redirectPattern.matcher(content);
554
555 String redirectTitle = StringPool.BLANK;
556
557 if (matcher.find()) {
558 redirectTitle = matcher.group(1);
559
560 redirectTitle = normalizeTitle(redirectTitle);
561 }
562
563 return redirectTitle;
564 }
565
566 protected List<String> readSpecialNamespaces(Element root)
567 throws ImportFilesException {
568
569 List<String> namespaces = new ArrayList<String>();
570
571 Element siteinfoEl = root.element("siteinfo");
572
573 if (siteinfoEl == null) {
574 throw new ImportFilesException("Invalid pages XML file");
575 }
576
577 Iterator<Element> itr = siteinfoEl.element(
578 "namespaces").elements("namespace").iterator();
579
580 while (itr.hasNext()) {
581 Element namespace = itr.next();
582
583 if (!namespace.attribute("key").equals("0")) {
584 namespaces.add(namespace.getText());
585 }
586 }
587
588 return namespaces;
589 }
590
591 protected String[] readTagsEntries(
592 long userId, WikiNode node, String content)
593 throws PortalException, SystemException {
594
595 Matcher matcher = _categoriesPattern.matcher(content);
596
597 List<String> tagsEntries = new ArrayList<String>();
598
599 while (matcher.find()) {
600 String categoryName = matcher.group(1);
601
602 categoryName = normalize(categoryName, 75);
603
604 TagsEntry tagsEntry = null;
605
606 try {
607 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
608 node.getCompanyId(), categoryName);
609 }
610 catch (NoSuchEntryException nsee) {
611 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
612 userId, categoryName);
613 }
614
615 tagsEntries.add(tagsEntry.getName());
616 }
617
618 if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
619 tagsEntries.add(_WORK_IN_PROGRESS_TAG);
620 }
621
622 return tagsEntries.toArray(new String[tagsEntries.size()]);
623 }
624
625 protected Map<String, String> readUsersFile(File usersFile)
626 throws IOException {
627
628 if ((usersFile == null) || (!usersFile.exists())) {
629 return Collections.EMPTY_MAP;
630 }
631
632 Map<String, String> usersMap = new HashMap<String, String>();
633
634 BufferedReader reader = new BufferedReader(new FileReader(usersFile));
635
636 String line = reader.readLine();
637
638 while (line != null) {
639 String[] array = StringUtil.split(line);
640
641 if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
642 (Validator.isNotNull(array[1]))) {
643
644 usersMap.put(array[0], array[1]);
645 }
646 else {
647 if (_log.isInfoEnabled()) {
648 _log.info(
649 "Ignoring line " + line +
650 " because it does not contain exactly 2 columns");
651 }
652 }
653
654 line = reader.readLine();
655 }
656
657 return usersMap;
658 }
659
660 private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = new String[]{
661 "thumb", "temp", "archive"};
662
663 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
664
665 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
666
667 private static Log _log = LogFactory.getLog(MediaWikiImporter.class);
668
669 private static Pattern _categoriesPattern = Pattern.compile(
670 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
671
672 private static Pattern _parentPattern = Pattern.compile(
673 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
674
675 private static Pattern _redirectPattern = Pattern.compile(
676 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
677
678 private MediaWikiToCreoleTranslator _translator =
679 new MediaWikiToCreoleTranslator();
680
681 }