1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.portlet.wiki.translators;
24  
25  import com.liferay.portal.kernel.util.StringPool;
26  import com.liferay.portlet.wiki.importers.mediawiki.MediaWikiImporter;
27  
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  /**
32   * <a href="MediaWikiToCreoleTranslator.java.html"><b><i>View Source</i></b></a>
33   *
34   * @author Jorge Ferrer
35   */
36  public class MediaWikiToCreoleTranslator extends BaseTranslator {
37  
38      public static final String TABLE_OF_CONTENTS = "<<TableOfContents>>\n\n";
39  
40      public MediaWikiToCreoleTranslator() {
41          initRegexps();
42          initNowikiRegexps();
43      }
44  
45      protected void initNowikiRegexps() {
46  
47          // Preformat protected
48  
49          nowikiRegexps.add("(<nowiki>)(.*?)(</nowiki>)");
50          nowikiRegexps.add("(<pre>)(.*?)(</pre>)");
51  
52          // Escape protected
53  
54          nowikiRegexps.add(
55              "~(\\*\\*|~|//|-|#|\\{\\{|}}|\\\\|~\\[~~[|]]|----|=|\\|)");
56      }
57  
58      protected void initRegexps() {
59  
60          // Clean unnecessary header emphasis
61  
62          regexps.put("= '''([^=]+)''' =", "= $1 =");
63          regexps.put("== '''([^=]+)''' ==", "== $1 ==");
64          regexps.put("== '''([^=]+)''' ===", "=== $1 ===");
65  
66          // Unscape angle brackets
67  
68          regexps.put("&lt;", "<");
69          regexps.put("&gt;", ">");
70  
71          // Remove categories
72  
73          regexps.put("\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*", "");
74  
75          // Remove disambiguations
76  
77          regexps.put("\\{{2}OtherTopics\\|([^\\}]*)\\}{2}", StringPool.BLANK);
78  
79          // Remove work in progress
80  
81          regexps.put("\\{{2}Work in progress\\}{2}", StringPool.BLANK);
82  
83          // Bold and italics
84  
85          regexps.put(
86              "''''((?s:.)*?)(''''|(\n\n|\r\r|\r\n\r\n))", "**//$1//**$3");
87  
88          // Bold
89  
90          regexps.put("'''((?s:.)*?)('''|(\n\n|\r\r|\r\n\r\n))", "**$1**$3");
91  
92          // Italics
93  
94          regexps.put("''((?s:.)*?)(''|(\n\n|\r\r|\r\n\r\n))", "//$1//$3");
95  
96          // Normalize URLs
97  
98          regexps.put("\\[{2}((http|ftp)[^ ]*) ([^\\]]*)\\]{2}", "[$1 $3]");
99  
100         // URL
101 
102         regexps.put("\\[((http|ftp)[^ ]*)\\]", "[[$1]]");
103 
104         // URL with label
105 
106         regexps.put("\\[((http|ftp)[^ ]*) ([^\\]]*)\\]", "[[$1|$3]]");
107 
108         // Term and definition
109 
110         regexps.put("^\\t([\\w]+):\\t(.*)", "**$1**:\n$2");
111 
112         // Indented paragraph
113 
114         regexps.put("^\\t:\\t(.*)", "$1");
115 
116         // Monospace
117 
118         regexps.put("(^ (.+))(\\n (.+))*", "{{{\n$0\n}}}");
119 
120         // No wiki
121 
122         regexps.put("<nowiki>([^<]*)</nowiki>", "{{{$1}}}");
123 
124         // HTML PRE
125 
126         regexps.put("<pre>([^<]*)</pre>", "{{{$1}}}");
127 
128         // User reference
129 
130         regexps.put("[-]*\\[{2}User:([^\\]]*)\\]{2}", "$1");
131     }
132 
133     protected String postProcess(String content) {
134 
135         // LEP-6118
136 
137         Matcher matcher = Pattern.compile(
138             "^=([^=]+)=", Pattern.MULTILINE).matcher(content);
139 
140         if (matcher.find()) {
141             content = runRegexp(content, "^===([^=]+)===", "====$1====");
142             content = runRegexp(content, "^==([^=]+)==", "===$1===");
143             content = runRegexp(content, "^=([^=]+)=", "==$1==");
144         }
145 
146         // Remove HTML tags
147 
148         for (int i = 0; i < _HTML_TAGS.length; i++) {
149             content = content.replaceAll(_HTML_TAGS[i], StringPool.BLANK);
150         }
151 
152         // Images
153 
154         matcher = Pattern.compile(
155             "\\[{2}Image:([^\\]]*)\\]{2}", Pattern.DOTALL).matcher(content);
156 
157         StringBuffer sb = new StringBuffer(content);
158 
159         int offset = 0;
160 
161         while (matcher.find()) {
162             String image =
163                 "{{" + MediaWikiImporter.SHARED_IMAGES_TITLE + "/" +
164                     matcher.group(1).toLowerCase() + "}}";
165 
166             sb.replace(
167                 matcher.start(0) + offset, matcher.end(0) + offset, image);
168 
169             offset += MediaWikiImporter.SHARED_IMAGES_TITLE.length() - 5;
170         }
171 
172         content = sb.toString();
173 
174         // Remove underscores from links
175 
176         matcher = Pattern.compile(
177             "\\[{2}([^\\]]*)\\]{2}", Pattern.DOTALL).matcher(content);
178 
179         sb = new StringBuffer(content);
180 
181         while (matcher.find()) {
182             String link = matcher.group(1).replace(
183                 StringPool.UNDERLINE, StringPool.SPACE);
184 
185             sb.replace(matcher.start(1), matcher.end(1), link);
186         }
187 
188         return TABLE_OF_CONTENTS + super.postProcess(sb.toString());
189     }
190 
191     private static final String[] _HTML_TAGS = {
192         "<blockquote>", "</blockquote>", "<br>", "<br/>", "<br />",  "<center>",
193         "</center>", "<cite>", "</cite>","<code>", "</code>", "<div[^>]*>",
194         "</div>", "<font[^>]*>", "</font>", "<hr>", "<hr/>", "<hr />", "<p>",
195         "</p>", "<tt>", "</tt>", "<var>", "</var>"};
196 
197 }