1   /**
2    * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
3    *
4    * The contents of this file are subject to the terms of the Liferay Enterprise
5    * Subscription License ("License"). You may not use this file except in
6    * compliance with the License. You can obtain a copy of the License by
7    * contacting Liferay, Inc. See the License for the specific language governing
8    * permissions and limitations under the License, including but not limited to
9    * distribution rights of the Software.
10   *
11   *
12   *
13   */
14  
15  package com.liferay.portal.util;
16  
17  import com.liferay.portal.kernel.util.CharPool;
18  import com.liferay.portal.kernel.util.Html;
19  import com.liferay.portal.kernel.util.HttpUtil;
20  import com.liferay.portal.kernel.util.StringBundler;
21  import com.liferay.portal.kernel.util.StringPool;
22  import com.liferay.portal.kernel.util.StringUtil;
23  
24  import java.util.regex.Matcher;
25  import java.util.regex.Pattern;
26  
27  import net.htmlparser.jericho.Source;
28  
29  /**
30   * <a href="HtmlImpl.java.html"><b><i>View Source</i></b></a>
31   *
32   * @author Brian Wing Shun Chan
33   * @author Clarence Shen
34   * @author Harry Mark
35   * @author Samuel Kong
36   * @author Connor McKay
37   */
38  public class HtmlImpl implements Html {
39  
40      public static final int ESCAPE_MODE_ATTRIBUTE = 1;
41  
42      public static final int ESCAPE_MODE_CSS = 2;
43  
44      public static final int ESCAPE_MODE_JS = 3;
45  
46      public static final int ESCAPE_MODE_TEXT = 4;
47  
48      public static final int ESCAPE_MODE_URL = 5;
49  
50      public String escape(String text) {
51          if (text == null) {
52              return null;
53          }
54  
55          if (text.length() == 0) {
56              return StringPool.BLANK;
57          }
58  
59          // Escape using XSS recommendations from
60          // http://www.owasp.org/index.php/Cross_Site_Scripting
61          // #How_to_Protect_Yourself
62  
63          StringBuilder sb = new StringBuilder(text.length());
64  
65          for (int i = 0; i < text.length(); i++) {
66              char c = text.charAt(i);
67  
68              switch (c) {
69                  case '<':
70                      sb.append("&lt;");
71  
72                      break;
73  
74                  case '>':
75                      sb.append("&gt;");
76  
77                      break;
78  
79                  case '&':
80                      sb.append("&amp;");
81  
82                      break;
83  
84                  case '"':
85                      sb.append("&#034;");
86  
87                      break;
88  
89                  case '\'':
90                      sb.append("&#039;");
91  
92                      break;
93  
94                  case '\u00bb': // '�'
95                      sb.append("&raquo;");
96  
97                      break;
98  
99                  default:
100                     sb.append(c);
101 
102                     break;
103             }
104         }
105 
106         if (sb.length() == text.length()) {
107             return text;
108         }
109         else {
110             return sb.toString();
111         }
112     }
113 
114     public String escape(String text, int type) {
115         if (text == null) {
116             return null;
117         }
118 
119         if (text.length() == 0) {
120             return StringPool.BLANK;
121         }
122 
123         String prefix = StringPool.BLANK;
124         String postfix = StringPool.BLANK;
125 
126         if (type == ESCAPE_MODE_ATTRIBUTE) {
127             prefix = "&#x";
128             postfix = StringPool.SEMICOLON;
129         }
130         else if (type == ESCAPE_MODE_CSS) {
131             prefix = StringPool.BACK_SLASH;
132         }
133         else if (type == ESCAPE_MODE_JS) {
134             prefix = "\\x";
135         }
136         else if (type == ESCAPE_MODE_URL) {
137             return HttpUtil.encodeURL(text, true);
138         }
139         else {
140             return escape(text);
141         }
142 
143         StringBuilder sb = new StringBuilder();
144 
145         for (int i = 0; i < text.length(); i++) {
146             char c = text.charAt(i);
147 
148             if ((Character.isLetterOrDigit(c)) ||
149                 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
150 
151                 sb.append(c);
152             }
153             else {
154                 sb.append(prefix);
155                 sb.append(Integer.toHexString(c));
156                 sb.append(postfix);
157             }
158         }
159 
160         if (sb.length() == text.length()) {
161             return text;
162         }
163         else {
164             return sb.toString();
165         }
166     }
167 
168     public String escapeAttribute(String attribute) {
169         return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
170     }
171 
172     public String escapeCSS(String css) {
173         return escape(css, ESCAPE_MODE_CSS);
174     }
175 
176     public String escapeHREF(String href) {
177         if (href == null) {
178             return null;
179         }
180 
181         if (href.length() == 0) {
182             return StringPool.BLANK;
183         }
184 
185         if (href.indexOf(StringPool.COLON) == 10) {
186             String protocol = href.substring(0, 10).toLowerCase();
187 
188             if (protocol.equals("javascript")) {
189                 return StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
190             }
191         }
192 
193         return href;
194     }
195 
196     public String escapeJS(String js) {
197         return escape(js, ESCAPE_MODE_JS);
198     }
199 
200     public String escapeURL(String url) {
201         return escape(url, ESCAPE_MODE_URL);
202     }
203 
204     public String extractText(String html) {
205         if (html == null) {
206             return null;
207         }
208 
209         Source source = new Source(html);
210 
211         return source.getTextExtractor().toString();
212     }
213 
214     public String fromInputSafe(String text) {
215         return StringUtil.replace(text, "&amp;", "&");
216     }
217 
218     public String replaceMsWordCharacters(String text) {
219         return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
220     }
221 
222     public String stripBetween(String text, String tag) {
223         return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
224     }
225 
226     public String stripComments(String text) {
227         return StringUtil.stripBetween(text, "<!--", "-->");
228     }
229 
230     public String stripHtml(String text) {
231         if (text == null) {
232             return null;
233         }
234 
235         text = stripComments(text);
236 
237         StringBuilder sb = new StringBuilder(text.length());
238 
239         int x = 0;
240         int y = text.indexOf("<");
241 
242         while (y != -1) {
243             sb.append(text.substring(x, y));
244             sb.append(StringPool.SPACE);
245 
246             // Look for text enclosed by <script></script>
247 
248             boolean scriptFound = isScriptTag(text, y + 1);
249 
250             if (scriptFound) {
251                 int pos = y + _TAG_SCRIPT.length;
252 
253                 // Find end of the tag
254 
255                 pos = text.indexOf(">", pos);
256 
257                 if (pos >= 0) {
258 
259                     // Check if preceding character is / (i.e. is this instance
260                     // of <script/>)
261 
262                     if (text.charAt(pos-1) != '/') {
263 
264                         // Search for the ending </script> tag
265 
266                         for (;;) {
267                             pos = text.indexOf("</", pos);
268 
269                             if (pos >= 0) {
270                                 if (isScriptTag(text, pos + 2)) {
271                                     y = pos;
272 
273                                     break;
274                                 }
275                                 else {
276 
277                                     // Skip past "</"
278 
279                                     pos += 2;
280                                 }
281                             }
282                             else {
283                                 break;
284                             }
285                         }
286                     }
287                 }
288             }
289 
290             x = text.indexOf(">", y);
291 
292             if (x == -1) {
293                 break;
294             }
295 
296             x++;
297 
298             if (x < y) {
299 
300                 // <b>Hello</b
301 
302                 break;
303             }
304 
305             y = text.indexOf("<", x);
306         }
307 
308         if (y == -1) {
309             sb.append(text.substring(x, text.length()));
310         }
311 
312         return sb.toString();
313     }
314 
315     public String toInputSafe(String text) {
316         return StringUtil.replace(
317             text,
318             new String[] {"&", "\""},
319             new String[] {"&amp;", "&quot;"});
320     }
321 
322     public String unescape(String text) {
323         if (text == null) {
324             return null;
325         }
326 
327         if (text.length() == 0) {
328             return StringPool.BLANK;
329         }
330 
331         // Optimize this
332 
333         text = StringUtil.replace(text, "&lt;", "<");
334         text = StringUtil.replace(text, "&gt;", ">");
335         text = StringUtil.replace(text, "&amp;", "&");
336         text = StringUtil.replace(text, "&#034;", "\"");
337         text = StringUtil.replace(text, "&#039;", "'");
338         text = StringUtil.replace(text, "&#040;", "(");
339         text = StringUtil.replace(text, "&#041;", ")");
340         text = StringUtil.replace(text, "&#044;", ",");
341         text = StringUtil.replace(text, "&#035;", "#");
342         text = StringUtil.replace(text, "&#037;", "%");
343         text = StringUtil.replace(text, "&#059;", ";");
344         text = StringUtil.replace(text, "&#061;", "=");
345         text = StringUtil.replace(text, "&#043;", "+");
346         text = StringUtil.replace(text, "&#045;", "-");
347 
348         return text;
349     }
350 
351     public String wordBreak(String text, int columns) {
352         StringBundler sb = new StringBundler();
353 
354         int length = 0;
355         int lastWrite = 0;
356         int pos = 0;
357 
358         Pattern pattern = Pattern.compile("([\\s<&]|$)");
359 
360         Matcher matcher = pattern.matcher(text);
361 
362         while (matcher.find()) {
363             if (matcher.start() < pos) {
364                 continue;
365             }
366 
367             while ((length + matcher.start() - pos) >= columns) {
368                 pos += columns - length;
369 
370                 sb.append(text.substring(lastWrite, pos));
371                 sb.append("<wbr/>");
372 
373                 length = 0;
374                 lastWrite = pos;
375             }
376 
377             length += matcher.start() - pos;
378 
379             String group = matcher.group();
380 
381             if (group.equals(StringPool.AMPERSAND)) {
382                 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
383 
384                 if (x != -1) {
385                     length++;
386                     pos = x + 1;
387                 }
388 
389                 continue;
390             }
391 
392             if (group.equals(StringPool.LESS_THAN)) {
393                 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
394 
395                 if (x != -1) {
396                     pos = x + 1;
397                 }
398 
399                 continue;
400             }
401 
402             if (group.equals(StringPool.SPACE) ||
403                 group.equals(StringPool.NEW_LINE)) {
404 
405                 length = 0;
406                 pos = matcher.start() + 1;
407             }
408         }
409 
410         sb.append(text.substring(lastWrite));
411 
412         return sb.toString();
413     }
414 
415     protected boolean isScriptTag(String text, int pos) {
416         if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
417             char item;
418 
419             for (int i = 0; i < _TAG_SCRIPT.length; i++) {
420                 item = text.charAt(pos++);
421 
422                 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
423                     return false;
424                 }
425             }
426 
427             item = text.charAt(pos);
428 
429             // Check that char after "script" is not a letter (i.e. another tag)
430 
431             return !Character.isLetter(item);
432         }
433         else {
434             return false;
435         }
436     }
437 
438     private static final String[] _MS_WORD_UNICODE = new String[] {
439         "\u00ae", "\u2019", "\u201c", "\u201d"
440     };
441 
442     private static final String[] _MS_WORD_HTML = new String[] {
443         "&reg;", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
444     };
445 
446     private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
447 
448 }