1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * The contents of this file are subject to the terms of the Liferay Enterprise
5    * Subscription License ("License"). You may not use this file except in
6    * compliance with the License. You can obtain a copy of the License by
7    * contacting Liferay, Inc. See the License for the specific language governing
8    * permissions and limitations under the License, including but not limited to
9    * distribution rights of the Software.
10   *
11   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17   * SOFTWARE.
18   */
19  
20  package com.liferay.portal.util;
21  
22  import au.id.jericho.lib.html.Source;
23  
24  import com.liferay.portal.kernel.util.Html;
25  import com.liferay.portal.kernel.util.HttpUtil;
26  import com.liferay.portal.kernel.util.StringPool;
27  import com.liferay.portal.kernel.util.StringUtil;
28  
29  /**
30   * <a href="HtmlImpl.java.html"><b><i>View Source</i></b></a>
31   *
32   * @author Brian Wing Shun Chan
33   * @author Clarence Shen
34   * @author Harry Mark
35   * @author Samuel Kong
36   *
37   */
38  public class HtmlImpl implements Html {
39  
40      public static final int ESCAPE_MODE_ATTRIBUTE = 1;
41  
42      public static final int ESCAPE_MODE_CSS = 2;
43  
44      public static final int ESCAPE_MODE_JS = 3;
45  
46      public static final int ESCAPE_MODE_TEXT = 4;
47  
48      public static final int ESCAPE_MODE_URL = 5;
49  
50      public String escape(String text) {
51          if (text == null) {
52              return null;
53          }
54  
55          // Escape using XSS recommendations from
56          // http://www.owasp.org/index.php/Cross_Site_Scripting
57          // #How_to_Protect_Yourself
58  
59          StringBuilder sb = new StringBuilder(text.length());
60  
61          for (int i = 0; i < text.length(); i++) {
62              char c = text.charAt(i);
63  
64              switch (c) {
65                  case '<':
66                      sb.append("&lt;");
67  
68                      break;
69  
70                  case '>':
71                      sb.append("&gt;");
72  
73                      break;
74  
75                  case '&':
76                      sb.append("&amp;");
77  
78                      break;
79  
80                  case '"':
81                      sb.append("&#034;");
82  
83                      break;
84  
85                  case '\'':
86                      sb.append("&#039;");
87  
88                      break;
89  
90                  default:
91                      sb.append(c);
92  
93                      break;
94              }
95          }
96  
97          return sb.toString();
98      }
99  
100     public String escape(String text, int type) {
101         if (text == null){
102             return null;
103         }
104 
105         String prefix = StringPool.BLANK;
106         String postfix = StringPool.BLANK;
107 
108         if (type == ESCAPE_MODE_ATTRIBUTE) {
109             prefix = "&#x";
110             postfix = StringPool.SEMICOLON;
111         }
112         else if (type == ESCAPE_MODE_CSS) {
113             prefix = StringPool.BACK_SLASH;
114         }
115         else if (type == ESCAPE_MODE_JS) {
116             prefix = "\\x";
117         }
118         else if (type == ESCAPE_MODE_URL) {
119             return HttpUtil.encodeURL(text, true);
120         }
121         else {
122             return escape(text);
123         }
124 
125         StringBuilder sb = new StringBuilder();
126 
127         for (int i = 0; i < text.length(); i++) {
128             char c = text.charAt(i);
129 
130             if (Character.isLetterOrDigit(c)) {
131                 sb.append(c);
132             }
133             else {
134                 sb.append(prefix);
135                 sb.append(Integer.toHexString(c));
136                 sb.append(postfix);
137             }
138         }
139 
140         return sb.toString();
141     }
142 
143     public String escapeAttribute(String attribute) {
144         return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
145     }
146 
147     public String escapeCSS(String css) {
148         return escape(css, ESCAPE_MODE_CSS);
149     }
150 
151     public String escapeJS(String js) {
152         return escape(js, ESCAPE_MODE_JS);
153     }
154 
155     public String escapeURL(String url) {
156         return escape(url, ESCAPE_MODE_URL);
157     }
158 
159     public String extractText(String html) {
160         if (html == null) {
161             return null;
162         }
163 
164         Source source = new Source(html);
165 
166         return source.getTextExtractor().toString();
167     }
168 
169     public String fromInputSafe(String text) {
170         return StringUtil.replace(text, "&amp;", "&");
171     }
172 
173     public String replaceMsWordCharacters(String text) {
174         return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
175     }
176 
177     public String stripBetween(String text, String tag) {
178         return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
179     }
180 
181     public String stripComments(String text) {
182         return StringUtil.stripBetween(text, "<!--", "-->");
183     }
184 
185     public String stripHtml(String text) {
186         if (text == null) {
187             return null;
188         }
189 
190         text = stripComments(text);
191 
192         StringBuilder sb = new StringBuilder(text.length());
193 
194         int x = 0;
195         int y = text.indexOf("<");
196 
197         while (y != -1) {
198             sb.append(text.substring(x, y));
199             sb.append(StringPool.SPACE);
200 
201             // Look for text enclosed by <script></script>
202 
203             boolean scriptFound = isScriptTag(text, y + 1);
204 
205             if (scriptFound) {
206                 int pos = y + _TAG_SCRIPT.length;
207 
208                 // Find end of the tag
209 
210                 pos = text.indexOf(">", pos);
211 
212                 if (pos >= 0) {
213 
214                     // Check if preceding character is / (i.e. is this instance
215                     // of <script/>)
216 
217                     if (text.charAt(pos-1) != '/') {
218 
219                         // Search for the ending </script> tag
220 
221                         for (;;) {
222                             pos = text.indexOf("</", pos);
223 
224                             if (pos >= 0) {
225                                 if (isScriptTag(text, pos + 2)) {
226                                     y = pos;
227 
228                                     break;
229                                 }
230                                 else {
231 
232                                     // Skip past "</"
233 
234                                     pos += 2;
235                                 }
236                             }
237                             else {
238                                 break;
239                             }
240                         }
241                     }
242                 }
243             }
244 
245             x = text.indexOf(">", y);
246 
247             if (x == -1) {
248                 break;
249             }
250 
251             x++;
252 
253             if (x < y) {
254 
255                 // <b>Hello</b
256 
257                 break;
258             }
259 
260             y = text.indexOf("<", x);
261         }
262 
263         if (y == -1) {
264             sb.append(text.substring(x, text.length()));
265         }
266 
267         return sb.toString();
268     }
269 
270     public String toInputSafe(String text) {
271         return StringUtil.replace(
272             text,
273             new String[] {"&", "\""},
274             new String[] {"&amp;", "&quot;"});
275     }
276 
277     public String unescape(String text) {
278         if (text == null) {
279             return null;
280         }
281 
282         // Optimize this
283 
284         text = StringUtil.replace(text, "&lt;", "<");
285         text = StringUtil.replace(text, "&gt;", ">");
286         text = StringUtil.replace(text, "&amp;", "&");
287         text = StringUtil.replace(text, "&#034;", "\"");
288         text = StringUtil.replace(text, "&#039;", "'");
289         text = StringUtil.replace(text, "&#040;", "(");
290         text = StringUtil.replace(text, "&#041;", ")");
291         text = StringUtil.replace(text, "&#035;", "#");
292         text = StringUtil.replace(text, "&#037;", "%");
293         text = StringUtil.replace(text, "&#059;", ";");
294         text = StringUtil.replace(text, "&#043;", "+");
295         text = StringUtil.replace(text, "&#045;", "-");
296 
297         return text;
298     }
299 
300     protected boolean isScriptTag(String text, int pos) {
301         if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
302             char item;
303 
304             for (int i = 0; i < _TAG_SCRIPT.length; i++) {
305                 item = text.charAt(pos++);
306 
307                 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
308                     return false;
309                 }
310             }
311 
312             item = text.charAt(pos);
313 
314             // Check that char after "script" is not a letter (i.e. another tag)
315 
316             return !Character.isLetter(item);
317         }
318         else {
319             return false;
320         }
321     }
322 
323     private static final String[] _MS_WORD_UNICODE = new String[] {
324         "\u00ae", "\u2019", "\u201c", "\u201d"
325     };
326 
327     private static final String[] _MS_WORD_HTML = new String[] {
328         "&reg;", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
329     };
330 
331     private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
332 
333 }