1
22
23 package com.liferay.portlet.wiki.importers.mediawiki;
24
25 import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26 import com.liferay.portal.NoSuchUserException;
27 import com.liferay.portal.PortalException;
28 import com.liferay.portal.SystemException;
29 import com.liferay.portal.kernel.log.Log;
30 import com.liferay.portal.kernel.log.LogFactoryUtil;
31 import com.liferay.portal.kernel.util.ArrayUtil;
32 import com.liferay.portal.kernel.util.MapUtil;
33 import com.liferay.portal.kernel.util.ObjectValuePair;
34 import com.liferay.portal.kernel.util.ProgressTracker;
35 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
36 import com.liferay.portal.kernel.util.StringPool;
37 import com.liferay.portal.kernel.util.StringUtil;
38 import com.liferay.portal.kernel.util.Validator;
39 import com.liferay.portal.kernel.xml.Document;
40 import com.liferay.portal.kernel.xml.DocumentException;
41 import com.liferay.portal.kernel.xml.Element;
42 import com.liferay.portal.kernel.xml.SAXReaderUtil;
43 import com.liferay.portal.kernel.zip.ZipReader;
44 import com.liferay.portal.model.User;
45 import com.liferay.portal.service.UserLocalServiceUtil;
46 import com.liferay.portal.util.PropsValues;
47 import com.liferay.portlet.tags.NoSuchEntryException;
48 import com.liferay.portlet.tags.model.TagsEntry;
49 import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
50 import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
51 import com.liferay.portlet.tags.util.TagsUtil;
52 import com.liferay.portlet.wiki.ImportFilesException;
53 import com.liferay.portlet.wiki.NoSuchPageException;
54 import com.liferay.portlet.wiki.importers.WikiImporter;
55 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
56 import com.liferay.portlet.wiki.model.WikiNode;
57 import com.liferay.portlet.wiki.model.WikiPage;
58 import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
59 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
60 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
61
62 import java.io.BufferedReader;
63 import java.io.File;
64 import java.io.FileReader;
65 import java.io.IOException;
66
67 import java.util.ArrayList;
68 import java.util.Collections;
69 import java.util.HashMap;
70 import java.util.Iterator;
71 import java.util.List;
72 import java.util.Map;
73 import java.util.regex.Matcher;
74 import java.util.regex.Pattern;
75
76
83 public class MediaWikiImporter implements WikiImporter {
84
85 public static final String SHARED_IMAGES_CONTENT = "See attachments";
86
87 public static final String SHARED_IMAGES_TITLE = "SharedImages";
88
89 public void importPages(
90 long userId, WikiNode node, File[] files,
91 Map<String, String[]> options)
92 throws PortalException {
93
94 if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
95 throw new PortalException("The pages file is mandatory");
96 }
97
98 File pagesFile = files[0];
99 File usersFile = files[1];
100 File imagesFile = files[2];
101
102 try {
103 Document doc = SAXReaderUtil.read(pagesFile);
104
105 Map<String, String> usersMap = readUsersFile(usersFile);
106
107 Element root = doc.getRootElement();
108
109 List<String> specialNamespaces = readSpecialNamespaces(root);
110
111 processSpecialPages(userId, node, root, specialNamespaces);
112 processRegularPages(
113 userId, node, root, specialNamespaces, usersMap, imagesFile,
114 options);
115 processImages(userId, node, imagesFile);
116
117 moveFrontPage(userId, node, options);
118 }
119 catch (DocumentException de) {
120 throw new ImportFilesException("Invalid XML file provided");
121 }
122 catch (IOException de) {
123 throw new ImportFilesException("Error reading the files provided");
124 }
125 catch (PortalException e) {
126 throw e;
127 }
128 catch (Exception e) {
129 throw new PortalException(e);
130 }
131 }
132
133 protected long getUserId(
134 long userId, WikiNode node, String author,
135 Map<String, String> usersMap)
136 throws PortalException, SystemException {
137
138 User user = null;
139
140 String emailAddress = usersMap.get(author);
141
142 try {
143 if (Validator.isNull(emailAddress)) {
144 user = UserLocalServiceUtil.getUserByScreenName(
145 node.getCompanyId(), author.toLowerCase());
146 }
147 else {
148 user = UserLocalServiceUtil.getUserByEmailAddress(
149 node.getCompanyId(), emailAddress);
150 }
151 }
152 catch (NoSuchUserException nsue) {
153 user = UserLocalServiceUtil.getUserById(userId);
154 }
155
156 return user.getUserId();
157 }
158
159 protected void importPage(
160 long userId, String author, WikiNode node, String title,
161 String content, String summary, Map<String, String> usersMap)
162 throws PortalException {
163
164 try {
165 long authorUserId = getUserId(userId, node, author, usersMap);
166 String parentTitle = readParentTitle(content);
167 String redirectTitle = readRedirectTitle(content);
168 String[] tagsEntries = readTagsEntries(userId, node, content);
169
170 if (Validator.isNull(redirectTitle)) {
171 content = _translator.translate(content);
172 }
173 else {
174 content =
175 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
176 StringPool.DOUBLE_CLOSE_BRACKET;
177 }
178
179 WikiPage page = null;
180
181 try {
182 page = WikiPageLocalServiceUtil.getPage(
183 node.getNodeId(), title);
184 }
185 catch (NoSuchPageException nspe) {
186 page = WikiPageLocalServiceUtil.addPage(
187 authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
188 null, true, null, null);
189 }
190
191 WikiPageLocalServiceUtil.updatePage(
192 authorUserId, node.getNodeId(), title, page.getVersion(),
193 content, summary, true, "creole", parentTitle,
194 redirectTitle, tagsEntries, null, null);
195 }
196 catch (Exception e) {
197 throw new PortalException("Error importing page " + title, e);
198 }
199 }
200
201 protected boolean isSpecialMediaWikiPage(
202 String title, List<String> specialNamespaces) {
203
204 for (String namespace: specialNamespaces) {
205 if (title.startsWith(namespace + StringPool.COLON)) {
206 return true;
207 }
208 }
209
210 return false;
211 }
212
213 protected boolean isValidImage(String[] paths, byte[] bytes) {
214 if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
215 return false;
216 }
217
218 if ((paths.length > 1) &&
219 (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
220
221 return false;
222 }
223
224 String fileName = paths[paths.length - 1];
225
226 try {
227 DLLocalServiceUtil.validate(fileName, bytes);
228 }
229 catch (PortalException pe) {
230 return false;
231 }
232 catch (SystemException se) {
233 return false;
234 }
235
236 return true;
237 }
238
239 protected void moveFrontPage(
240 long userId, WikiNode node, Map<String, String[]> options) {
241
242 String frontPageTitle = MapUtil.getString(
243 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
244
245 if (Validator.isNotNull(frontPageTitle)) {
246 frontPageTitle = normalizeTitle(frontPageTitle);
247
248 try {
249 if (WikiPageLocalServiceUtil.getPagesCount(
250 node.getNodeId(), frontPageTitle, true) > 0) {
251
252 WikiPageLocalServiceUtil.movePage(
253 userId, node.getNodeId(), frontPageTitle,
254 WikiPageImpl.FRONT_PAGE, false, null, null);
255
256 }
257 }
258 catch (Exception e) {
259 if (_log.isWarnEnabled()) {
260 StringBuilder sb = new StringBuilder();
261
262 sb.append("Could not move ");
263 sb.append(WikiPageImpl.FRONT_PAGE);
264 sb.append(" to the title provided: ");
265 sb.append(frontPageTitle);
266
267 _log.warn(sb.toString(), e);
268 }
269 }
270
271 }
272
273 }
274
275 protected String normalize(String categoryName, int length) {
276 categoryName = TagsUtil.toWord(categoryName.trim());
277
278 return StringUtil.shorten(categoryName, length);
279 }
280
281 protected String normalizeDescription(String description) {
282 description = description.replaceAll(
283 _categoriesPattern.pattern(), StringPool.BLANK);
284
285 return normalize(description, 300);
286 }
287
288 protected String normalizeTitle(String title) {
289 title = title.replaceAll(
290 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
291
292 return StringUtil.shorten(title, 75);
293 }
294
295 private void processImages(long userId, WikiNode node, File imagesFile)
296 throws Exception {
297
298 if ((imagesFile == null) || (!imagesFile.exists())) {
299 return;
300 }
301
302 ProgressTracker progressTracker =
303 ProgressTrackerThreadLocal.getProgressTracker();
304
305 int count = 0;
306
307 ZipReader zipReader = new ZipReader(imagesFile);
308
309 Map<String, byte[]> entries = zipReader.getEntries();
310
311 int total = entries.size();
312
313 if (total > 0) {
314 try {
315 WikiPageLocalServiceUtil.getPage(
316 node.getNodeId(), SHARED_IMAGES_TITLE);
317 }
318 catch (NoSuchPageException nspe) {
319 WikiPageLocalServiceUtil.addPage(
320 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
321 SHARED_IMAGES_CONTENT, null, true, null, null);
322 }
323 }
324
325 List<ObjectValuePair<String, byte[]>> attachments =
326 new ArrayList<ObjectValuePair<String, byte[]>>();
327
328 Iterator<Map.Entry<String, byte[]>> itr = entries.entrySet().iterator();
329
330 int percentage = 50;
331
332 for (int i = 0; itr.hasNext(); i++) {
333 Map.Entry<String, byte[]> entry = itr.next();
334
335 String key = entry.getKey();
336 byte[] value = entry.getValue();
337
338 if (key.endsWith(StringPool.SLASH)) {
339 if (_log.isInfoEnabled()) {
340 _log.info("Ignoring " + key);
341 }
342
343 continue;
344 }
345
346 String[] paths = StringUtil.split(key, StringPool.SLASH);
347
348 if (!isValidImage(paths, value)) {
349 if (_log.isInfoEnabled()) {
350 _log.info("Ignoring " + key);
351 }
352
353 continue;
354 }
355
356 String fileName = paths[paths.length - 1].toLowerCase();
357
358 attachments.add(
359 new ObjectValuePair<String, byte[]>(fileName, value));
360
361 count++;
362
363 if ((i % 5) == 0) {
364 WikiPageLocalServiceUtil.addPageAttachments(
365 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
366
367 attachments.clear();
368
369 percentage = Math.min(50 + (i * 50) / total, 99);
370
371 progressTracker.updateProgress(percentage);
372 }
373 }
374
375 if (!attachments.isEmpty()) {
376 WikiPageLocalServiceUtil.addPageAttachments(
377 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
378 }
379
380 if (_log.isInfoEnabled()) {
381 _log.info("Imported " + count + " images into " + node.getName());
382 }
383 }
384
385 protected void processRegularPages(
386 long userId, WikiNode node, Element root,
387 List<String> specialNamespaces, Map<String, String> usersMap,
388 File imagesFile, Map<String, String[]> options) {
389
390 boolean importLatestVersion = MapUtil.getBoolean(
391 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
392
393 ProgressTracker progressTracker =
394 ProgressTrackerThreadLocal.getProgressTracker();
395
396 int count = 0;
397
398 List<Element> pages = root.elements("page");
399
400 int total = pages.size();
401
402 Iterator<Element> itr = root.elements("page").iterator();
403
404 int percentage = 10;
405 int maxPercentage = 50;
406
407 if ((imagesFile == null) || (!imagesFile.exists())) {
408 maxPercentage = 99;
409 }
410
411 int percentageRange = maxPercentage - percentage;
412
413 for (int i = 0; itr.hasNext(); i++) {
414 Element pageEl = itr.next();
415
416 String title = pageEl.elementText("title");
417
418 title = normalizeTitle(title);
419
420 percentage = Math.min(
421 10 + (i * percentageRange) / total, maxPercentage);
422
423 progressTracker.updateProgress(percentage);
424
425 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
426 continue;
427 }
428
429 List<Element> revisionEls = pageEl.elements("revision");
430
431 if (importLatestVersion) {
432 Element lastRevisionEl = revisionEls.get(
433 revisionEls.size() - 1);
434
435 revisionEls = new ArrayList<Element>();
436
437 revisionEls.add(lastRevisionEl);
438 }
439
440 for (Element curRevisionEl : revisionEls) {
441 String author = curRevisionEl.element(
442 "contributor").elementText("username");
443 String content = curRevisionEl.elementText("text");
444 String summary = curRevisionEl.elementText("comment");
445
446 try {
447 importPage(
448 userId, author, node, title, content, summary,
449 usersMap);
450 }
451 catch (Exception e) {
452 if (_log.isWarnEnabled()) {
453 StringBuilder sb = new StringBuilder();
454
455 sb.append("Page with title ");
456 sb.append(title);
457 sb.append(" could not be imported");
458
459 _log.warn(sb.toString(), e);
460 }
461 }
462 }
463
464 count++;
465 }
466
467 if (_log.isInfoEnabled()) {
468 _log.info("Imported " + count + " pages into " + node.getName());
469 }
470 }
471
472 protected void processSpecialPages(
473 long userId, WikiNode node, Element root,
474 List<String> specialNamespaces)
475 throws PortalException {
476
477 ProgressTracker progressTracker =
478 ProgressTrackerThreadLocal.getProgressTracker();
479
480 List<Element> pages = root.elements("page");
481
482 int total = pages.size();
483
484 Iterator<Element> itr = pages.iterator();
485
486 for (int i = 0; itr.hasNext(); i++) {
487 Element page = itr.next();
488
489 String title = page.elementText("title");
490
491 if (!title.startsWith("Category:")) {
492 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
493 root.remove(page);
494 }
495
496 continue;
497 }
498
499 String categoryName = title.substring("Category:".length());
500
501 categoryName = normalize(categoryName, 75);
502
503 String description = page.element("revision").elementText("text");
504
505 description = normalizeDescription(description);
506
507 try {
508 TagsEntry tagsEntry = null;
509
510 try {
511 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
512 node.getCompanyId(), categoryName);
513 }
514 catch (NoSuchEntryException nsee) {
515 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
516 userId, categoryName);
517 }
518
519 if (Validator.isNotNull(description)) {
520 TagsPropertyLocalServiceUtil.addProperty(
521 userId, tagsEntry.getEntryId(), "description",
522 description);
523 }
524 }
525 catch (SystemException se) {
526 _log.error(se, se);
527 }
528
529 if ((i % 5) == 0) {
530 progressTracker.updateProgress((i * 10) / total);
531 }
532 }
533 }
534
535 protected String readParentTitle(String content) {
536 Matcher matcher = _parentPattern.matcher(content);
537
538 String redirectTitle = StringPool.BLANK;
539
540 if (matcher.find()) {
541 redirectTitle = matcher.group(1);
542
543 redirectTitle = normalizeTitle(redirectTitle);
544
545 redirectTitle += " (disambiguation)";
546 }
547
548 return redirectTitle;
549 }
550
551 protected String readRedirectTitle(String content) {
552 Matcher matcher = _redirectPattern.matcher(content);
553
554 String redirectTitle = StringPool.BLANK;
555
556 if (matcher.find()) {
557 redirectTitle = matcher.group(1);
558
559 redirectTitle = normalizeTitle(redirectTitle);
560 }
561
562 return redirectTitle;
563 }
564
565 protected List<String> readSpecialNamespaces(Element root)
566 throws ImportFilesException {
567
568 List<String> namespaces = new ArrayList<String>();
569
570 Element siteinfoEl = root.element("siteinfo");
571
572 if (siteinfoEl == null) {
573 throw new ImportFilesException("Invalid pages XML file");
574 }
575
576 Iterator<Element> itr = siteinfoEl.element(
577 "namespaces").elements("namespace").iterator();
578
579 while (itr.hasNext()) {
580 Element namespace = itr.next();
581
582 if (!namespace.attribute("key").getData().equals("0")) {
583 namespaces.add(namespace.getText());
584 }
585 }
586
587 return namespaces;
588 }
589
590 protected String[] readTagsEntries(
591 long userId, WikiNode node, String content)
592 throws PortalException, SystemException {
593
594 Matcher matcher = _categoriesPattern.matcher(content);
595
596 List<String> tagsEntries = new ArrayList<String>();
597
598 while (matcher.find()) {
599 String categoryName = matcher.group(1);
600
601 categoryName = normalize(categoryName, 75);
602
603 TagsEntry tagsEntry = null;
604
605 try {
606 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
607 node.getCompanyId(), categoryName);
608 }
609 catch (NoSuchEntryException nsee) {
610 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
611 userId, categoryName);
612 }
613
614 tagsEntries.add(tagsEntry.getName());
615 }
616
617 if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
618 tagsEntries.add(_WORK_IN_PROGRESS_TAG);
619 }
620
621 return tagsEntries.toArray(new String[tagsEntries.size()]);
622 }
623
624 protected Map<String, String> readUsersFile(File usersFile)
625 throws IOException {
626
627 if ((usersFile == null) || (!usersFile.exists())) {
628 return Collections.EMPTY_MAP;
629 }
630
631 Map<String, String> usersMap = new HashMap<String, String>();
632
633 BufferedReader reader = new BufferedReader(new FileReader(usersFile));
634
635 String line = reader.readLine();
636
637 while (line != null) {
638 String[] array = StringUtil.split(line);
639
640 if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
641 (Validator.isNotNull(array[1]))) {
642
643 usersMap.put(array[0], array[1]);
644 }
645 else {
646 if (_log.isInfoEnabled()) {
647 _log.info(
648 "Ignoring line " + line +
649 " because it does not contain exactly 2 columns");
650 }
651 }
652
653 line = reader.readLine();
654 }
655
656 return usersMap;
657 }
658
659 private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = new String[]{
660 "thumb", "temp", "archive"};
661
662 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
663
664 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
665
666 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
667
668 private static Pattern _categoriesPattern = Pattern.compile(
669 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
670 private static Pattern _parentPattern = Pattern.compile(
671 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
672 private static Pattern _redirectPattern = Pattern.compile(
673 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
674
675 private MediaWikiToCreoleTranslator _translator =
676 new MediaWikiToCreoleTranslator();
677
678 }