1   /**
2    * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
3    *
4    * This library is free software; you can redistribute it and/or modify it under
5    * the terms of the GNU Lesser General Public License as published by the Free
6    * Software Foundation; either version 2.1 of the License, or (at your option)
7    * any later version.
8    *
9    * This library is distributed in the hope that it will be useful, but WITHOUT
10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11   * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12   * details.
13   */
14  
15  package com.liferay.portal.util;
16  
17  import com.liferay.portal.kernel.util.CharPool;
18  import com.liferay.portal.kernel.util.Html;
19  import com.liferay.portal.kernel.util.HttpUtil;
20  import com.liferay.portal.kernel.util.StringPool;
21  import com.liferay.portal.kernel.util.StringUtil;
22  
23  import net.htmlparser.jericho.Source;
24  
25  /**
26   * <a href="HtmlImpl.java.html"><b><i>View Source</i></b></a>
27   *
28   * @author Brian Wing Shun Chan
29   * @author Clarence Shen
30   * @author Harry Mark
31   * @author Samuel Kong
32   */
33  public class HtmlImpl implements Html {
34  
35      public static final int ESCAPE_MODE_ATTRIBUTE = 1;
36  
37      public static final int ESCAPE_MODE_CSS = 2;
38  
39      public static final int ESCAPE_MODE_JS = 3;
40  
41      public static final int ESCAPE_MODE_TEXT = 4;
42  
43      public static final int ESCAPE_MODE_URL = 5;
44  
45      public String escape(String text) {
46          if (text == null) {
47              return null;
48          }
49  
50          // Escape using XSS recommendations from
51          // http://www.owasp.org/index.php/Cross_Site_Scripting
52          // #How_to_Protect_Yourself
53  
54          StringBuilder sb = new StringBuilder(text.length());
55  
56          for (int i = 0; i < text.length(); i++) {
57              char c = text.charAt(i);
58  
59              switch (c) {
60                  case '<':
61                      sb.append("&lt;");
62  
63                      break;
64  
65                  case '>':
66                      sb.append("&gt;");
67  
68                      break;
69  
70                  case '&':
71                      sb.append("&amp;");
72  
73                      break;
74  
75                  case '"':
76                      sb.append("&#034;");
77  
78                      break;
79  
80                  case '\'':
81                      sb.append("&#039;");
82  
83                      break;
84  
85                  default:
86                      sb.append(c);
87  
88                      break;
89              }
90          }
91  
92          return sb.toString();
93      }
94  
95      public String escape(String text, int type) {
96          if (text == null) {
97              return null;
98          }
99  
100         String prefix = StringPool.BLANK;
101         String postfix = StringPool.BLANK;
102 
103         if (type == ESCAPE_MODE_ATTRIBUTE) {
104             prefix = "&#x";
105             postfix = StringPool.SEMICOLON;
106         }
107         else if (type == ESCAPE_MODE_CSS) {
108             prefix = StringPool.BACK_SLASH;
109         }
110         else if (type == ESCAPE_MODE_JS) {
111             prefix = "\\x";
112         }
113         else if (type == ESCAPE_MODE_URL) {
114             return HttpUtil.encodeURL(text, true);
115         }
116         else {
117             return escape(text);
118         }
119 
120         StringBuilder sb = new StringBuilder();
121 
122         for (int i = 0; i < text.length(); i++) {
123             char c = text.charAt(i);
124 
125             if ((Character.isLetterOrDigit(c)) ||
126                 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
127 
128                 sb.append(c);
129             }
130             else {
131                 sb.append(prefix);
132                 sb.append(Integer.toHexString(c));
133                 sb.append(postfix);
134             }
135         }
136 
137         return sb.toString();
138     }
139 
140     public String escapeAttribute(String attribute) {
141         return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
142     }
143 
144     public String escapeCSS(String css) {
145         return escape(css, ESCAPE_MODE_CSS);
146     }
147 
148     public String escapeJS(String js) {
149         return escape(js, ESCAPE_MODE_JS);
150     }
151 
152     public String escapeURL(String url) {
153         return escape(url, ESCAPE_MODE_URL);
154     }
155 
156     public String extractText(String html) {
157         if (html == null) {
158             return null;
159         }
160 
161         Source source = new Source(html);
162 
163         return source.getTextExtractor().toString();
164     }
165 
166     public String fromInputSafe(String text) {
167         return StringUtil.replace(text, "&amp;", "&");
168     }
169 
170     public String replaceMsWordCharacters(String text) {
171         return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
172     }
173 
174     public String stripBetween(String text, String tag) {
175         return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
176     }
177 
178     public String stripComments(String text) {
179         return StringUtil.stripBetween(text, "<!--", "-->");
180     }
181 
182     public String stripHtml(String text) {
183         if (text == null) {
184             return null;
185         }
186 
187         text = stripComments(text);
188 
189         StringBuilder sb = new StringBuilder(text.length());
190 
191         int x = 0;
192         int y = text.indexOf("<");
193 
194         while (y != -1) {
195             sb.append(text.substring(x, y));
196             sb.append(StringPool.SPACE);
197 
198             // Look for text enclosed by <script></script>
199 
200             boolean scriptFound = isScriptTag(text, y + 1);
201 
202             if (scriptFound) {
203                 int pos = y + _TAG_SCRIPT.length;
204 
205                 // Find end of the tag
206 
207                 pos = text.indexOf(">", pos);
208 
209                 if (pos >= 0) {
210 
211                     // Check if preceding character is / (i.e. is this instance
212                     // of <script/>)
213 
214                     if (text.charAt(pos-1) != '/') {
215 
216                         // Search for the ending </script> tag
217 
218                         for (;;) {
219                             pos = text.indexOf("</", pos);
220 
221                             if (pos >= 0) {
222                                 if (isScriptTag(text, pos + 2)) {
223                                     y = pos;
224 
225                                     break;
226                                 }
227                                 else {
228 
229                                     // Skip past "</"
230 
231                                     pos += 2;
232                                 }
233                             }
234                             else {
235                                 break;
236                             }
237                         }
238                     }
239                 }
240             }
241 
242             x = text.indexOf(">", y);
243 
244             if (x == -1) {
245                 break;
246             }
247 
248             x++;
249 
250             if (x < y) {
251 
252                 // <b>Hello</b
253 
254                 break;
255             }
256 
257             y = text.indexOf("<", x);
258         }
259 
260         if (y == -1) {
261             sb.append(text.substring(x, text.length()));
262         }
263 
264         return sb.toString();
265     }
266 
267     public String toInputSafe(String text) {
268         return StringUtil.replace(
269             text,
270             new String[] {"&", "\""},
271             new String[] {"&amp;", "&quot;"});
272     }
273 
274     public String unescape(String text) {
275         if (text == null) {
276             return null;
277         }
278 
279         // Optimize this
280 
281         text = StringUtil.replace(text, "&lt;", "<");
282         text = StringUtil.replace(text, "&gt;", ">");
283         text = StringUtil.replace(text, "&amp;", "&");
284         text = StringUtil.replace(text, "&#034;", "\"");
285         text = StringUtil.replace(text, "&#039;", "'");
286         text = StringUtil.replace(text, "&#040;", "(");
287         text = StringUtil.replace(text, "&#041;", ")");
288         text = StringUtil.replace(text, "&#035;", "#");
289         text = StringUtil.replace(text, "&#037;", "%");
290         text = StringUtil.replace(text, "&#059;", ";");
291         text = StringUtil.replace(text, "&#043;", "+");
292         text = StringUtil.replace(text, "&#045;", "-");
293 
294         return text;
295     }
296 
297     protected boolean isScriptTag(String text, int pos) {
298         if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
299             char item;
300 
301             for (int i = 0; i < _TAG_SCRIPT.length; i++) {
302                 item = text.charAt(pos++);
303 
304                 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
305                     return false;
306                 }
307             }
308 
309             item = text.charAt(pos);
310 
311             // Check that char after "script" is not a letter (i.e. another tag)
312 
313             return !Character.isLetter(item);
314         }
315         else {
316             return false;
317         }
318     }
319 
320     private static final String[] _MS_WORD_UNICODE = new String[] {
321         "\u00ae", "\u2019", "\u201c", "\u201d"
322     };
323 
324     private static final String[] _MS_WORD_HTML = new String[] {
325         "&reg;", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
326     };
327 
328     private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
329 
330 }