001    /**
002     * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
003     *
004     * The contents of this file are subject to the terms of the Liferay Enterprise
005     * Subscription License ("License"). You may not use this file except in
006     * compliance with the License. You can obtain a copy of the License by
007     * contacting Liferay, Inc. See the License for the specific language governing
008     * permissions and limitations under the License, including but not limited to
009     * distribution rights of the Software.
010     *
011     *
012     *
013     */
014    
015    package com.liferay.portlet.wiki.importers.mediawiki;
016    
017    import com.liferay.documentlibrary.service.DLLocalServiceUtil;
018    import com.liferay.portal.NoSuchUserException;
019    import com.liferay.portal.kernel.exception.PortalException;
020    import com.liferay.portal.kernel.exception.SystemException;
021    import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
022    import com.liferay.portal.kernel.log.Log;
023    import com.liferay.portal.kernel.log.LogFactoryUtil;
024    import com.liferay.portal.kernel.util.ArrayUtil;
025    import com.liferay.portal.kernel.util.MapUtil;
026    import com.liferay.portal.kernel.util.ObjectValuePair;
027    import com.liferay.portal.kernel.util.ProgressTracker;
028    import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
029    import com.liferay.portal.kernel.util.StringBundler;
030    import com.liferay.portal.kernel.util.StringPool;
031    import com.liferay.portal.kernel.util.StringUtil;
032    import com.liferay.portal.kernel.util.Validator;
033    import com.liferay.portal.kernel.xml.Document;
034    import com.liferay.portal.kernel.xml.DocumentException;
035    import com.liferay.portal.kernel.xml.Element;
036    import com.liferay.portal.kernel.xml.SAXReaderUtil;
037    import com.liferay.portal.kernel.zip.ZipReader;
038    import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
039    import com.liferay.portal.model.User;
040    import com.liferay.portal.service.ServiceContext;
041    import com.liferay.portal.service.UserLocalServiceUtil;
042    import com.liferay.portal.util.PropsValues;
043    import com.liferay.portlet.asset.NoSuchTagException;
044    import com.liferay.portlet.asset.model.AssetTag;
045    import com.liferay.portlet.asset.service.AssetTagLocalServiceUtil;
046    import com.liferay.portlet.asset.service.AssetTagPropertyLocalServiceUtil;
047    import com.liferay.portlet.asset.util.AssetUtil;
048    import com.liferay.portlet.wiki.ImportFilesException;
049    import com.liferay.portlet.wiki.NoSuchPageException;
050    import com.liferay.portlet.wiki.importers.WikiImporter;
051    import com.liferay.portlet.wiki.importers.WikiImporterKeys;
052    import com.liferay.portlet.wiki.model.WikiNode;
053    import com.liferay.portlet.wiki.model.WikiPage;
054    import com.liferay.portlet.wiki.model.WikiPageConstants;
055    import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
056    import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
057    
058    import java.io.File;
059    import java.io.FileReader;
060    import java.io.IOException;
061    
062    import java.util.ArrayList;
063    import java.util.Collections;
064    import java.util.HashMap;
065    import java.util.Iterator;
066    import java.util.List;
067    import java.util.Map;
068    import java.util.regex.Matcher;
069    import java.util.regex.Pattern;
070    
071    /**
072     * @author Alvaro del Castillo
073     * @author Jorge Ferrer
074     */
075    public class MediaWikiImporter implements WikiImporter {
076    
077            public static final String SHARED_IMAGES_CONTENT = "See attachments";
078    
079            public static final String SHARED_IMAGES_TITLE = "SharedImages";
080    
081            public void importPages(
082                            long userId, WikiNode node, File[] files,
083                            Map<String, String[]> options)
084                    throws PortalException {
085    
086                    if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
087                            throw new PortalException("The pages file is mandatory");
088                    }
089    
090                    File pagesFile = files[0];
091                    File usersFile = files[1];
092                    File imagesFile = files[2];
093    
094                    try {
095                            Document doc = SAXReaderUtil.read(pagesFile);
096    
097                            Map<String, String> usersMap = readUsersFile(usersFile);
098    
099                            Element root = doc.getRootElement();
100    
101                            List<String> specialNamespaces = readSpecialNamespaces(root);
102    
103                            processSpecialPages(userId, node, root, specialNamespaces);
104                            processRegularPages(
105                                    userId, node, root, specialNamespaces, usersMap, imagesFile,
106                                    options);
107                            processImages(userId, node, imagesFile);
108    
109                            moveFrontPage(userId, node, options);
110                    }
111                    catch (DocumentException de) {
112                            throw new ImportFilesException("Invalid XML file provided");
113                    }
114                    catch (IOException de) {
115                            throw new ImportFilesException("Error reading the files provided");
116                    }
117                    catch (PortalException e) {
118                            throw e;
119                    }
120                    catch (Exception e) {
121                            throw new PortalException(e);
122                    }
123            }
124    
125            protected long getUserId(
126                            long userId, WikiNode node, String author,
127                            Map<String, String> usersMap)
128                    throws PortalException, SystemException {
129    
130                    User user = null;
131    
132                    String emailAddress = usersMap.get(author);
133    
134                    try {
135                            if (Validator.isNull(emailAddress)) {
136                                    user = UserLocalServiceUtil.getUserByScreenName(
137                                            node.getCompanyId(), author.toLowerCase());
138                            }
139                            else {
140                                    user = UserLocalServiceUtil.getUserByEmailAddress(
141                                            node.getCompanyId(), emailAddress);
142                            }
143                    }
144                    catch (NoSuchUserException nsue) {
145                            user = UserLocalServiceUtil.getUserById(userId);
146                    }
147    
148                    return user.getUserId();
149            }
150    
151            protected void importPage(
152                            long userId, String author, WikiNode node, String title,
153                            String content, String summary, Map<String, String> usersMap)
154                    throws PortalException {
155    
156                    try {
157                            long authorUserId = getUserId(userId, node, author, usersMap);
158                            String parentTitle = readParentTitle(content);
159                            String redirectTitle = readRedirectTitle(content);
160    
161                            ServiceContext serviceContext = new ServiceContext();
162    
163                            serviceContext.setAddCommunityPermissions(true);
164                            serviceContext.setAddGuestPermissions(true);
165                            serviceContext.setAssetTagNames(
166                                    readAssetTagNames(userId, node, content));
167    
168                            if (Validator.isNull(redirectTitle)) {
169                                    content = _translator.translate(content);
170                            }
171                            else {
172                                    content =
173                                            StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
174                                                    StringPool.DOUBLE_CLOSE_BRACKET;
175                            }
176    
177                            WikiPage page = null;
178    
179                            try {
180                                    page = WikiPageLocalServiceUtil.getPage(
181                                            node.getNodeId(), title);
182                            }
183                            catch (NoSuchPageException nspe) {
184                                    page = WikiPageLocalServiceUtil.addPage(
185                                            authorUserId, node.getNodeId(), title,
186                                            WikiPageConstants.NEW, null, true, serviceContext);
187                            }
188    
189                            WikiPageLocalServiceUtil.updatePage(
190                                    authorUserId, node.getNodeId(), title, page.getVersion(),
191                                    content, summary, true, "creole", parentTitle, redirectTitle,
192                                    serviceContext);
193                    }
194                    catch (Exception e) {
195                            throw new PortalException("Error importing page " + title, e);
196                    }
197            }
198    
199            protected boolean isSpecialMediaWikiPage(
200                    String title, List<String> specialNamespaces) {
201    
202                    for (String namespace: specialNamespaces) {
203                            if (title.startsWith(namespace + StringPool.COLON)) {
204                                    return true;
205                            }
206                    }
207    
208                    return false;
209            }
210    
211            protected boolean isValidImage(String[] paths, byte[] bytes) {
212                    if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
213                            return false;
214                    }
215    
216                    if ((paths.length > 1) &&
217                            (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
218    
219                            return false;
220                    }
221    
222                    String fileName = paths[paths.length - 1];
223    
224                    try {
225                            DLLocalServiceUtil.validate(fileName, true, bytes);
226                    }
227                    catch (PortalException pe) {
228                            return false;
229                    }
230                    catch (SystemException se) {
231                            return false;
232                    }
233    
234                    return true;
235            }
236    
237            protected void moveFrontPage(
238                    long userId, WikiNode node, Map<String, String[]> options) {
239    
240                    String frontPageTitle = MapUtil.getString(
241                            options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
242    
243                    if (Validator.isNotNull(frontPageTitle)) {
244                            frontPageTitle = normalizeTitle(frontPageTitle);
245    
246                            try {
247                                    if (WikiPageLocalServiceUtil.getPagesCount(
248                                                    node.getNodeId(), frontPageTitle, true) > 0) {
249    
250                                            ServiceContext serviceContext = new ServiceContext();
251    
252                                            serviceContext.setAddCommunityPermissions(true);
253                                            serviceContext.setAddGuestPermissions(true);
254    
255                                            WikiPageLocalServiceUtil.movePage(
256                                                    userId, node.getNodeId(), frontPageTitle,
257                                                    WikiPageConstants.FRONT_PAGE, false, serviceContext);
258    
259                                    }
260                            }
261                            catch (Exception e) {
262                                    if (_log.isWarnEnabled()) {
263                                            StringBundler sb = new StringBundler(4);
264    
265                                            sb.append("Could not move ");
266                                            sb.append(WikiPageConstants.FRONT_PAGE);
267                                            sb.append(" to the title provided: ");
268                                            sb.append(frontPageTitle);
269    
270                                            _log.warn(sb.toString(), e);
271                                    }
272                            }
273    
274                    }
275    
276            }
277    
278            protected String normalize(String categoryName, int length) {
279                    categoryName = AssetUtil.toWord(categoryName.trim());
280    
281                    return StringUtil.shorten(categoryName, length);
282            }
283    
284            protected String normalizeDescription(String description) {
285                    description = description.replaceAll(
286                            _categoriesPattern.pattern(), StringPool.BLANK);
287    
288                    return normalize(description, 300);
289            }
290    
291            protected String normalizeTitle(String title) {
292                    title = title.replaceAll(
293                            PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
294    
295                    return StringUtil.shorten(title, 75);
296            }
297    
298            protected void processImages(long userId, WikiNode node, File imagesFile)
299                    throws Exception {
300    
301                    if ((imagesFile == null) || (!imagesFile.exists())) {
302                            return;
303                    }
304    
305                    ProgressTracker progressTracker =
306                            ProgressTrackerThreadLocal.getProgressTracker();
307    
308                    int count = 0;
309    
310                    ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(imagesFile);
311    
312                    List<String> entries = zipReader.getEntries();
313    
314                    int total = entries.size();
315    
316                    if (total > 0) {
317                            try {
318                                    WikiPageLocalServiceUtil.getPage(
319                                            node.getNodeId(), SHARED_IMAGES_TITLE);
320                            }
321                            catch (NoSuchPageException nspe) {
322                                    ServiceContext serviceContext = new ServiceContext();
323    
324                                    serviceContext.setAddCommunityPermissions(true);
325                                    serviceContext.setAddGuestPermissions(true);
326    
327                                    WikiPageLocalServiceUtil.addPage(
328                                            userId, node.getNodeId(), SHARED_IMAGES_TITLE,
329                                            SHARED_IMAGES_CONTENT, null, true, serviceContext);
330                            }
331                    }
332    
333                    List<ObjectValuePair<String, byte[]>> attachments =
334                            new ArrayList<ObjectValuePair<String, byte[]>>();
335    
336                    int percentage = 50;
337    
338                    for (int i = 0; i < entries.size(); i++) {
339                            String entry = entries.get(i);
340    
341                            String key = entry;
342                            byte[] value = zipReader.getEntryAsByteArray(entry);
343    
344                            String[] paths = StringUtil.split(key, StringPool.SLASH);
345    
346                            if (!isValidImage(paths, value)) {
347                                    if (_log.isInfoEnabled()) {
348                                            _log.info("Ignoring " + key);
349                                    }
350    
351                                    continue;
352                            }
353    
354                            String fileName = paths[paths.length - 1].toLowerCase();
355    
356                            attachments.add(
357                                    new ObjectValuePair<String, byte[]>(fileName, value));
358    
359                            count++;
360    
361                            if ((i % 5) == 0) {
362                                    WikiPageLocalServiceUtil.addPageAttachments(
363                                            node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
364    
365                                    attachments.clear();
366    
367                                    percentage = Math.min(50 + (i * 50) / total, 99);
368    
369                                    progressTracker.updateProgress(percentage);
370                            }
371                    }
372    
373                    if (!attachments.isEmpty()) {
374                            WikiPageLocalServiceUtil.addPageAttachments(
375                                    node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
376                    }
377    
378                    zipReader.close();
379    
380                    if (_log.isInfoEnabled()) {
381                            _log.info("Imported " + count + " images into " + node.getName());
382                    }
383            }
384    
385            protected void processRegularPages(
386                    long userId, WikiNode node, Element root,
387                    List<String> specialNamespaces, Map<String, String> usersMap,
388                    File imagesFile, Map<String, String[]> options) {
389    
390                    boolean importLatestVersion = MapUtil.getBoolean(
391                            options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
392    
393                    ProgressTracker progressTracker =
394                            ProgressTrackerThreadLocal.getProgressTracker();
395    
396                    int count = 0;
397    
398                    List<Element> pages = root.elements("page");
399    
400                    int total = pages.size();
401    
402                    Iterator<Element> itr = root.elements("page").iterator();
403    
404                    int percentage = 10;
405                    int maxPercentage = 50;
406    
407                    if ((imagesFile == null) || (!imagesFile.exists())) {
408                            maxPercentage = 99;
409                    }
410    
411                    int percentageRange = maxPercentage - percentage;
412    
413                    for (int i = 0; itr.hasNext(); i++) {
414                            Element pageEl = itr.next();
415    
416                            String title = pageEl.elementText("title");
417    
418                            title = normalizeTitle(title);
419    
420                            percentage = Math.min(
421                                    10 + (i * percentageRange) / total, maxPercentage);
422    
423                            progressTracker.updateProgress(percentage);
424    
425                            if (isSpecialMediaWikiPage(title, specialNamespaces)) {
426                                    continue;
427                            }
428    
429                            List<Element> revisionEls = pageEl.elements("revision");
430    
431                            if (importLatestVersion) {
432                                    Element lastRevisionEl = revisionEls.get(
433                                            revisionEls.size() - 1);
434    
435                                    revisionEls = new ArrayList<Element>();
436    
437                                    revisionEls.add(lastRevisionEl);
438                            }
439    
440                            for (Element curRevisionEl : revisionEls) {
441                                    String author = curRevisionEl.element(
442                                            "contributor").elementText("username");
443                                    String content = curRevisionEl.elementText("text");
444                                    String summary = curRevisionEl.elementText("comment");
445    
446                                    try {
447                                            importPage(
448                                                    userId, author, node, title, content, summary,
449                                                    usersMap);
450                                    }
451                                    catch (Exception e) {
452                                            if (_log.isWarnEnabled()) {
453                                                    StringBundler sb = new StringBundler(3);
454    
455                                                    sb.append("Page with title ");
456                                                    sb.append(title);
457                                                    sb.append(" could not be imported");
458    
459                                                    _log.warn(sb.toString(), e);
460                                            }
461                                    }
462                            }
463    
464                            count++;
465                    }
466    
467                    if (_log.isInfoEnabled()) {
468                            _log.info("Imported " + count + " pages into " + node.getName());
469                    }
470            }
471    
472            protected void processSpecialPages(
473                            long userId, WikiNode node, Element root,
474                            List<String> specialNamespaces)
475                    throws PortalException {
476    
477                    ProgressTracker progressTracker =
478                            ProgressTrackerThreadLocal.getProgressTracker();
479    
480                    List<Element> pages = root.elements("page");
481    
482                    int total = pages.size();
483    
484                    Iterator<Element> itr = pages.iterator();
485    
486                    for (int i = 0; itr.hasNext(); i++) {
487                            Element page = itr.next();
488    
489                            String title = page.elementText("title");
490    
491                            if (!title.startsWith("Category:")) {
492                                    if (isSpecialMediaWikiPage(title, specialNamespaces)) {
493                                            root.remove(page);
494                                    }
495    
496                                    continue;
497                            }
498    
499                            String categoryName = title.substring("Category:".length());
500    
501                            categoryName = normalize(categoryName, 75);
502    
503                            String description = page.element("revision").elementText("text");
504    
505                            description = normalizeDescription(description);
506    
507                            try {
508                                    AssetTag assetTag = null;
509    
510                                    try {
511                                            assetTag = AssetTagLocalServiceUtil.getTag(
512                                                    node.getCompanyId(), categoryName);
513                                    }
514                                    catch (NoSuchTagException nste) {
515                                            ServiceContext serviceContext = new ServiceContext();
516    
517                                            serviceContext.setAddCommunityPermissions(true);
518                                            serviceContext.setAddGuestPermissions(true);
519                                            serviceContext.setScopeGroupId(node.getGroupId());
520    
521                                            assetTag = AssetTagLocalServiceUtil.addTag(
522                                                    userId, categoryName, null, serviceContext);
523                                    }
524    
525                                    if (Validator.isNotNull(description)) {
526                                            AssetTagPropertyLocalServiceUtil.addTagProperty(
527                                                    userId, assetTag.getTagId(), "description",
528                                                    description);
529                                    }
530                            }
531                            catch (SystemException se) {
532                                     _log.error(se, se);
533                            }
534    
535                            if ((i % 5) == 0) {
536                                    progressTracker.updateProgress((i * 10) / total);
537                            }
538                    }
539            }
540    
541            protected String[] readAssetTagNames(
542                            long userId, WikiNode node, String content)
543                    throws PortalException, SystemException {
544    
545                    Matcher matcher = _categoriesPattern.matcher(content);
546    
547                    List<String> assetTagNames = new ArrayList<String>();
548    
549                    while (matcher.find()) {
550                            String categoryName = matcher.group(1);
551    
552                            categoryName = normalize(categoryName, 75);
553    
554                            AssetTag assetTag = null;
555    
556                            try {
557                                    assetTag = AssetTagLocalServiceUtil.getTag(
558                                            node.getGroupId(), categoryName);
559                            }
560                            catch (NoSuchTagException nste) {
561                                    ServiceContext serviceContext = new ServiceContext();
562    
563                                    serviceContext.setAddCommunityPermissions(true);
564                                    serviceContext.setAddGuestPermissions(true);
565                                    serviceContext.setScopeGroupId(node.getGroupId());
566    
567                                    assetTag = AssetTagLocalServiceUtil.addTag(
568                                            userId, categoryName, null, serviceContext);
569                            }
570    
571                            assetTagNames.add(assetTag.getName());
572                    }
573    
574                    if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
575                            assetTagNames.add(_WORK_IN_PROGRESS_TAG);
576                    }
577    
578                    return assetTagNames.toArray(new String[assetTagNames.size()]);
579            }
580    
581            protected String readParentTitle(String content) {
582                    Matcher matcher = _parentPattern.matcher(content);
583    
584                    String redirectTitle = StringPool.BLANK;
585    
586                    if (matcher.find()) {
587                            redirectTitle = matcher.group(1);
588    
589                            redirectTitle = normalizeTitle(redirectTitle);
590    
591                            redirectTitle += " (disambiguation)";
592                    }
593    
594                    return redirectTitle;
595            }
596            protected String readRedirectTitle(String content) {
597                    Matcher matcher = _redirectPattern.matcher(content);
598    
599                    String redirectTitle = StringPool.BLANK;
600    
601                    if (matcher.find()) {
602                            redirectTitle = matcher.group(1);
603    
604                            redirectTitle = normalizeTitle(redirectTitle);
605                    }
606    
607                    return redirectTitle;
608            }
609            protected List<String> readSpecialNamespaces(Element root)
610                    throws ImportFilesException {
611    
612                    List<String> namespaces = new ArrayList<String>();
613    
614                    Element siteinfoEl = root.element("siteinfo");
615    
616                    if (siteinfoEl == null) {
617                            throw new ImportFilesException("Invalid pages XML file");
618                    }
619    
620                    Iterator<Element> itr = siteinfoEl.element(
621                            "namespaces").elements("namespace").iterator();
622    
623                    while (itr.hasNext()) {
624                            Element namespace = itr.next();
625    
626                            if (!namespace.attribute("key").getData().equals("0")) {
627                                    namespaces.add(namespace.getText());
628                            }
629                    }
630    
631                    return namespaces;
632            }
633    
634            protected Map<String, String> readUsersFile(File usersFile)
635                    throws IOException {
636    
637                    if ((usersFile == null) || (!usersFile.exists())) {
638                            return Collections.EMPTY_MAP;
639                    }
640    
641                    Map<String, String> usersMap = new HashMap<String, String>();
642    
643                    UnsyncBufferedReader unsyncBufferedReader =
644                            new UnsyncBufferedReader(new FileReader(usersFile));
645    
646                    String line = unsyncBufferedReader.readLine();
647    
648                    while (line != null) {
649                            String[] array = StringUtil.split(line);
650    
651                            if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
652                                    (Validator.isNotNull(array[1]))) {
653    
654                                    usersMap.put(array[0], array[1]);
655                            }
656                            else {
657                                    if (_log.isInfoEnabled()) {
658                                            _log.info(
659                                                    "Ignoring line " + line +
660                                                            " because it does not contain exactly 2 columns");
661                                    }
662                            }
663    
664                            line = unsyncBufferedReader.readLine();
665                    }
666    
667                    return usersMap;
668            }
669    
670            private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
671                    "thumb", "temp", "archive"
672            };
673    
674            private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
675    
676            private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
677    
678            private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
679    
680            private static Pattern _categoriesPattern = Pattern.compile(
681                    "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
682            private static Pattern _parentPattern = Pattern.compile(
683                    "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
684            private static Pattern _redirectPattern = Pattern.compile(
685                    "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
686    
687            private MediaWikiToCreoleTranslator _translator =
688                    new MediaWikiToCreoleTranslator();
689    
690    }