001
014
015 package com.liferay.portlet.wiki.importers.mediawiki;
016
017 import com.liferay.documentlibrary.service.DLLocalServiceUtil;
018 import com.liferay.portal.NoSuchUserException;
019 import com.liferay.portal.kernel.exception.PortalException;
020 import com.liferay.portal.kernel.exception.SystemException;
021 import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
022 import com.liferay.portal.kernel.log.Log;
023 import com.liferay.portal.kernel.log.LogFactoryUtil;
024 import com.liferay.portal.kernel.util.ArrayUtil;
025 import com.liferay.portal.kernel.util.MapUtil;
026 import com.liferay.portal.kernel.util.ObjectValuePair;
027 import com.liferay.portal.kernel.util.ProgressTracker;
028 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
029 import com.liferay.portal.kernel.util.StringBundler;
030 import com.liferay.portal.kernel.util.StringPool;
031 import com.liferay.portal.kernel.util.StringUtil;
032 import com.liferay.portal.kernel.util.Validator;
033 import com.liferay.portal.kernel.xml.Document;
034 import com.liferay.portal.kernel.xml.DocumentException;
035 import com.liferay.portal.kernel.xml.Element;
036 import com.liferay.portal.kernel.xml.SAXReaderUtil;
037 import com.liferay.portal.kernel.zip.ZipReader;
038 import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
039 import com.liferay.portal.model.User;
040 import com.liferay.portal.service.ServiceContext;
041 import com.liferay.portal.service.UserLocalServiceUtil;
042 import com.liferay.portal.util.PropsValues;
043 import com.liferay.portlet.asset.NoSuchTagException;
044 import com.liferay.portlet.asset.model.AssetTag;
045 import com.liferay.portlet.asset.service.AssetTagLocalServiceUtil;
046 import com.liferay.portlet.asset.service.AssetTagPropertyLocalServiceUtil;
047 import com.liferay.portlet.asset.util.AssetUtil;
048 import com.liferay.portlet.wiki.ImportFilesException;
049 import com.liferay.portlet.wiki.NoSuchPageException;
050 import com.liferay.portlet.wiki.importers.WikiImporter;
051 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
052 import com.liferay.portlet.wiki.model.WikiNode;
053 import com.liferay.portlet.wiki.model.WikiPage;
054 import com.liferay.portlet.wiki.model.WikiPageConstants;
055 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
056 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
057
058 import java.io.File;
059 import java.io.FileReader;
060 import java.io.IOException;
061
062 import java.util.ArrayList;
063 import java.util.Collections;
064 import java.util.HashMap;
065 import java.util.Iterator;
066 import java.util.List;
067 import java.util.Map;
068 import java.util.regex.Matcher;
069 import java.util.regex.Pattern;
070
071
075 public class MediaWikiImporter implements WikiImporter {
076
077 public static final String SHARED_IMAGES_CONTENT = "See attachments";
078
079 public static final String SHARED_IMAGES_TITLE = "SharedImages";
080
081 public void importPages(
082 long userId, WikiNode node, File[] files,
083 Map<String, String[]> options)
084 throws PortalException {
085
086 if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
087 throw new PortalException("The pages file is mandatory");
088 }
089
090 File pagesFile = files[0];
091 File usersFile = files[1];
092 File imagesFile = files[2];
093
094 try {
095 Document doc = SAXReaderUtil.read(pagesFile);
096
097 Map<String, String> usersMap = readUsersFile(usersFile);
098
099 Element root = doc.getRootElement();
100
101 List<String> specialNamespaces = readSpecialNamespaces(root);
102
103 processSpecialPages(userId, node, root, specialNamespaces);
104 processRegularPages(
105 userId, node, root, specialNamespaces, usersMap, imagesFile,
106 options);
107 processImages(userId, node, imagesFile);
108
109 moveFrontPage(userId, node, options);
110 }
111 catch (DocumentException de) {
112 throw new ImportFilesException("Invalid XML file provided");
113 }
114 catch (IOException de) {
115 throw new ImportFilesException("Error reading the files provided");
116 }
117 catch (PortalException e) {
118 throw e;
119 }
120 catch (Exception e) {
121 throw new PortalException(e);
122 }
123 }
124
125 protected long getUserId(
126 long userId, WikiNode node, String author,
127 Map<String, String> usersMap)
128 throws PortalException, SystemException {
129
130 User user = null;
131
132 String emailAddress = usersMap.get(author);
133
134 try {
135 if (Validator.isNull(emailAddress)) {
136 user = UserLocalServiceUtil.getUserByScreenName(
137 node.getCompanyId(), author.toLowerCase());
138 }
139 else {
140 user = UserLocalServiceUtil.getUserByEmailAddress(
141 node.getCompanyId(), emailAddress);
142 }
143 }
144 catch (NoSuchUserException nsue) {
145 user = UserLocalServiceUtil.getUserById(userId);
146 }
147
148 return user.getUserId();
149 }
150
151 protected void importPage(
152 long userId, String author, WikiNode node, String title,
153 String content, String summary, Map<String, String> usersMap)
154 throws PortalException {
155
156 try {
157 long authorUserId = getUserId(userId, node, author, usersMap);
158 String parentTitle = readParentTitle(content);
159 String redirectTitle = readRedirectTitle(content);
160
161 ServiceContext serviceContext = new ServiceContext();
162
163 serviceContext.setAddCommunityPermissions(true);
164 serviceContext.setAddGuestPermissions(true);
165 serviceContext.setAssetTagNames(
166 readAssetTagNames(userId, node, content));
167
168 if (Validator.isNull(redirectTitle)) {
169 content = _translator.translate(content);
170 }
171 else {
172 content =
173 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
174 StringPool.DOUBLE_CLOSE_BRACKET;
175 }
176
177 WikiPage page = null;
178
179 try {
180 page = WikiPageLocalServiceUtil.getPage(
181 node.getNodeId(), title);
182 }
183 catch (NoSuchPageException nspe) {
184 page = WikiPageLocalServiceUtil.addPage(
185 authorUserId, node.getNodeId(), title,
186 WikiPageConstants.NEW, null, true, serviceContext);
187 }
188
189 WikiPageLocalServiceUtil.updatePage(
190 authorUserId, node.getNodeId(), title, page.getVersion(),
191 content, summary, true, "creole", parentTitle, redirectTitle,
192 serviceContext);
193 }
194 catch (Exception e) {
195 throw new PortalException("Error importing page " + title, e);
196 }
197 }
198
199 protected boolean isSpecialMediaWikiPage(
200 String title, List<String> specialNamespaces) {
201
202 for (String namespace: specialNamespaces) {
203 if (title.startsWith(namespace + StringPool.COLON)) {
204 return true;
205 }
206 }
207
208 return false;
209 }
210
211 protected boolean isValidImage(String[] paths, byte[] bytes) {
212 if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
213 return false;
214 }
215
216 if ((paths.length > 1) &&
217 (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
218
219 return false;
220 }
221
222 String fileName = paths[paths.length - 1];
223
224 try {
225 DLLocalServiceUtil.validate(fileName, true, bytes);
226 }
227 catch (PortalException pe) {
228 return false;
229 }
230 catch (SystemException se) {
231 return false;
232 }
233
234 return true;
235 }
236
237 protected void moveFrontPage(
238 long userId, WikiNode node, Map<String, String[]> options) {
239
240 String frontPageTitle = MapUtil.getString(
241 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
242
243 if (Validator.isNotNull(frontPageTitle)) {
244 frontPageTitle = normalizeTitle(frontPageTitle);
245
246 try {
247 if (WikiPageLocalServiceUtil.getPagesCount(
248 node.getNodeId(), frontPageTitle, true) > 0) {
249
250 ServiceContext serviceContext = new ServiceContext();
251
252 serviceContext.setAddCommunityPermissions(true);
253 serviceContext.setAddGuestPermissions(true);
254
255 WikiPageLocalServiceUtil.movePage(
256 userId, node.getNodeId(), frontPageTitle,
257 WikiPageConstants.FRONT_PAGE, false, serviceContext);
258
259 }
260 }
261 catch (Exception e) {
262 if (_log.isWarnEnabled()) {
263 StringBundler sb = new StringBundler(4);
264
265 sb.append("Could not move ");
266 sb.append(WikiPageConstants.FRONT_PAGE);
267 sb.append(" to the title provided: ");
268 sb.append(frontPageTitle);
269
270 _log.warn(sb.toString(), e);
271 }
272 }
273
274 }
275
276 }
277
278 protected String normalize(String categoryName, int length) {
279 categoryName = AssetUtil.toWord(categoryName.trim());
280
281 return StringUtil.shorten(categoryName, length);
282 }
283
284 protected String normalizeDescription(String description) {
285 description = description.replaceAll(
286 _categoriesPattern.pattern(), StringPool.BLANK);
287
288 return normalize(description, 300);
289 }
290
291 protected String normalizeTitle(String title) {
292 title = title.replaceAll(
293 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
294
295 return StringUtil.shorten(title, 75);
296 }
297
298 protected void processImages(long userId, WikiNode node, File imagesFile)
299 throws Exception {
300
301 if ((imagesFile == null) || (!imagesFile.exists())) {
302 return;
303 }
304
305 ProgressTracker progressTracker =
306 ProgressTrackerThreadLocal.getProgressTracker();
307
308 int count = 0;
309
310 ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(imagesFile);
311
312 List<String> entries = zipReader.getEntries();
313
314 int total = entries.size();
315
316 if (total > 0) {
317 try {
318 WikiPageLocalServiceUtil.getPage(
319 node.getNodeId(), SHARED_IMAGES_TITLE);
320 }
321 catch (NoSuchPageException nspe) {
322 ServiceContext serviceContext = new ServiceContext();
323
324 serviceContext.setAddCommunityPermissions(true);
325 serviceContext.setAddGuestPermissions(true);
326
327 WikiPageLocalServiceUtil.addPage(
328 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
329 SHARED_IMAGES_CONTENT, null, true, serviceContext);
330 }
331 }
332
333 List<ObjectValuePair<String, byte[]>> attachments =
334 new ArrayList<ObjectValuePair<String, byte[]>>();
335
336 int percentage = 50;
337
338 for (int i = 0; i < entries.size(); i++) {
339 String entry = entries.get(i);
340
341 String key = entry;
342 byte[] value = zipReader.getEntryAsByteArray(entry);
343
344 String[] paths = StringUtil.split(key, StringPool.SLASH);
345
346 if (!isValidImage(paths, value)) {
347 if (_log.isInfoEnabled()) {
348 _log.info("Ignoring " + key);
349 }
350
351 continue;
352 }
353
354 String fileName = paths[paths.length - 1].toLowerCase();
355
356 attachments.add(
357 new ObjectValuePair<String, byte[]>(fileName, value));
358
359 count++;
360
361 if ((i % 5) == 0) {
362 WikiPageLocalServiceUtil.addPageAttachments(
363 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
364
365 attachments.clear();
366
367 percentage = Math.min(50 + (i * 50) / total, 99);
368
369 progressTracker.updateProgress(percentage);
370 }
371 }
372
373 if (!attachments.isEmpty()) {
374 WikiPageLocalServiceUtil.addPageAttachments(
375 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
376 }
377
378 zipReader.close();
379
380 if (_log.isInfoEnabled()) {
381 _log.info("Imported " + count + " images into " + node.getName());
382 }
383 }
384
385 protected void processRegularPages(
386 long userId, WikiNode node, Element root,
387 List<String> specialNamespaces, Map<String, String> usersMap,
388 File imagesFile, Map<String, String[]> options) {
389
390 boolean importLatestVersion = MapUtil.getBoolean(
391 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
392
393 ProgressTracker progressTracker =
394 ProgressTrackerThreadLocal.getProgressTracker();
395
396 int count = 0;
397
398 List<Element> pages = root.elements("page");
399
400 int total = pages.size();
401
402 Iterator<Element> itr = root.elements("page").iterator();
403
404 int percentage = 10;
405 int maxPercentage = 50;
406
407 if ((imagesFile == null) || (!imagesFile.exists())) {
408 maxPercentage = 99;
409 }
410
411 int percentageRange = maxPercentage - percentage;
412
413 for (int i = 0; itr.hasNext(); i++) {
414 Element pageEl = itr.next();
415
416 String title = pageEl.elementText("title");
417
418 title = normalizeTitle(title);
419
420 percentage = Math.min(
421 10 + (i * percentageRange) / total, maxPercentage);
422
423 progressTracker.updateProgress(percentage);
424
425 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
426 continue;
427 }
428
429 List<Element> revisionEls = pageEl.elements("revision");
430
431 if (importLatestVersion) {
432 Element lastRevisionEl = revisionEls.get(
433 revisionEls.size() - 1);
434
435 revisionEls = new ArrayList<Element>();
436
437 revisionEls.add(lastRevisionEl);
438 }
439
440 for (Element curRevisionEl : revisionEls) {
441 String author = curRevisionEl.element(
442 "contributor").elementText("username");
443 String content = curRevisionEl.elementText("text");
444 String summary = curRevisionEl.elementText("comment");
445
446 try {
447 importPage(
448 userId, author, node, title, content, summary,
449 usersMap);
450 }
451 catch (Exception e) {
452 if (_log.isWarnEnabled()) {
453 StringBundler sb = new StringBundler(3);
454
455 sb.append("Page with title ");
456 sb.append(title);
457 sb.append(" could not be imported");
458
459 _log.warn(sb.toString(), e);
460 }
461 }
462 }
463
464 count++;
465 }
466
467 if (_log.isInfoEnabled()) {
468 _log.info("Imported " + count + " pages into " + node.getName());
469 }
470 }
471
472 protected void processSpecialPages(
473 long userId, WikiNode node, Element root,
474 List<String> specialNamespaces)
475 throws PortalException {
476
477 ProgressTracker progressTracker =
478 ProgressTrackerThreadLocal.getProgressTracker();
479
480 List<Element> pages = root.elements("page");
481
482 int total = pages.size();
483
484 Iterator<Element> itr = pages.iterator();
485
486 for (int i = 0; itr.hasNext(); i++) {
487 Element page = itr.next();
488
489 String title = page.elementText("title");
490
491 if (!title.startsWith("Category:")) {
492 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
493 root.remove(page);
494 }
495
496 continue;
497 }
498
499 String categoryName = title.substring("Category:".length());
500
501 categoryName = normalize(categoryName, 75);
502
503 String description = page.element("revision").elementText("text");
504
505 description = normalizeDescription(description);
506
507 try {
508 AssetTag assetTag = null;
509
510 try {
511 assetTag = AssetTagLocalServiceUtil.getTag(
512 node.getCompanyId(), categoryName);
513 }
514 catch (NoSuchTagException nste) {
515 ServiceContext serviceContext = new ServiceContext();
516
517 serviceContext.setAddCommunityPermissions(true);
518 serviceContext.setAddGuestPermissions(true);
519 serviceContext.setScopeGroupId(node.getGroupId());
520
521 assetTag = AssetTagLocalServiceUtil.addTag(
522 userId, categoryName, null, serviceContext);
523 }
524
525 if (Validator.isNotNull(description)) {
526 AssetTagPropertyLocalServiceUtil.addTagProperty(
527 userId, assetTag.getTagId(), "description",
528 description);
529 }
530 }
531 catch (SystemException se) {
532 _log.error(se, se);
533 }
534
535 if ((i % 5) == 0) {
536 progressTracker.updateProgress((i * 10) / total);
537 }
538 }
539 }
540
541 protected String[] readAssetTagNames(
542 long userId, WikiNode node, String content)
543 throws PortalException, SystemException {
544
545 Matcher matcher = _categoriesPattern.matcher(content);
546
547 List<String> assetTagNames = new ArrayList<String>();
548
549 while (matcher.find()) {
550 String categoryName = matcher.group(1);
551
552 categoryName = normalize(categoryName, 75);
553
554 AssetTag assetTag = null;
555
556 try {
557 assetTag = AssetTagLocalServiceUtil.getTag(
558 node.getGroupId(), categoryName);
559 }
560 catch (NoSuchTagException nste) {
561 ServiceContext serviceContext = new ServiceContext();
562
563 serviceContext.setAddCommunityPermissions(true);
564 serviceContext.setAddGuestPermissions(true);
565 serviceContext.setScopeGroupId(node.getGroupId());
566
567 assetTag = AssetTagLocalServiceUtil.addTag(
568 userId, categoryName, null, serviceContext);
569 }
570
571 assetTagNames.add(assetTag.getName());
572 }
573
574 if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
575 assetTagNames.add(_WORK_IN_PROGRESS_TAG);
576 }
577
578 return assetTagNames.toArray(new String[assetTagNames.size()]);
579 }
580
581 protected String readParentTitle(String content) {
582 Matcher matcher = _parentPattern.matcher(content);
583
584 String redirectTitle = StringPool.BLANK;
585
586 if (matcher.find()) {
587 redirectTitle = matcher.group(1);
588
589 redirectTitle = normalizeTitle(redirectTitle);
590
591 redirectTitle += " (disambiguation)";
592 }
593
594 return redirectTitle;
595 }
596 protected String readRedirectTitle(String content) {
597 Matcher matcher = _redirectPattern.matcher(content);
598
599 String redirectTitle = StringPool.BLANK;
600
601 if (matcher.find()) {
602 redirectTitle = matcher.group(1);
603
604 redirectTitle = normalizeTitle(redirectTitle);
605 }
606
607 return redirectTitle;
608 }
609 protected List<String> readSpecialNamespaces(Element root)
610 throws ImportFilesException {
611
612 List<String> namespaces = new ArrayList<String>();
613
614 Element siteinfoEl = root.element("siteinfo");
615
616 if (siteinfoEl == null) {
617 throw new ImportFilesException("Invalid pages XML file");
618 }
619
620 Iterator<Element> itr = siteinfoEl.element(
621 "namespaces").elements("namespace").iterator();
622
623 while (itr.hasNext()) {
624 Element namespace = itr.next();
625
626 if (!namespace.attribute("key").getData().equals("0")) {
627 namespaces.add(namespace.getText());
628 }
629 }
630
631 return namespaces;
632 }
633
634 protected Map<String, String> readUsersFile(File usersFile)
635 throws IOException {
636
637 if ((usersFile == null) || (!usersFile.exists())) {
638 return Collections.EMPTY_MAP;
639 }
640
641 Map<String, String> usersMap = new HashMap<String, String>();
642
643 UnsyncBufferedReader unsyncBufferedReader =
644 new UnsyncBufferedReader(new FileReader(usersFile));
645
646 String line = unsyncBufferedReader.readLine();
647
648 while (line != null) {
649 String[] array = StringUtil.split(line);
650
651 if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
652 (Validator.isNotNull(array[1]))) {
653
654 usersMap.put(array[0], array[1]);
655 }
656 else {
657 if (_log.isInfoEnabled()) {
658 _log.info(
659 "Ignoring line " + line +
660 " because it does not contain exactly 2 columns");
661 }
662 }
663
664 line = unsyncBufferedReader.readLine();
665 }
666
667 return usersMap;
668 }
669
670 private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
671 "thumb", "temp", "archive"
672 };
673
674 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
675
676 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
677
678 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
679
680 private static Pattern _categoriesPattern = Pattern.compile(
681 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
682 private static Pattern _parentPattern = Pattern.compile(
683 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
684 private static Pattern _redirectPattern = Pattern.compile(
685 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
686
687 private MediaWikiToCreoleTranslator _translator =
688 new MediaWikiToCreoleTranslator();
689
690 }