1
14
15 package com.liferay.portal.util;
16
17 import com.liferay.portal.kernel.util.CharPool;
18 import com.liferay.portal.kernel.util.Html;
19 import com.liferay.portal.kernel.util.HttpUtil;
20 import com.liferay.portal.kernel.util.StringPool;
21 import com.liferay.portal.kernel.util.StringUtil;
22
23 import net.htmlparser.jericho.Source;
24
25
33 public class HtmlImpl implements Html {
34
35 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
36
37 public static final int ESCAPE_MODE_CSS = 2;
38
39 public static final int ESCAPE_MODE_JS = 3;
40
41 public static final int ESCAPE_MODE_TEXT = 4;
42
43 public static final int ESCAPE_MODE_URL = 5;
44
45 public String escape(String text) {
46 if (text == null) {
47 return null;
48 }
49
50
54 StringBuilder sb = new StringBuilder(text.length());
55
56 for (int i = 0; i < text.length(); i++) {
57 char c = text.charAt(i);
58
59 switch (c) {
60 case '<':
61 sb.append("<");
62
63 break;
64
65 case '>':
66 sb.append(">");
67
68 break;
69
70 case '&':
71 sb.append("&");
72
73 break;
74
75 case '"':
76 sb.append(""");
77
78 break;
79
80 case '\'':
81 sb.append("'");
82
83 break;
84
85 default:
86 sb.append(c);
87
88 break;
89 }
90 }
91
92 return sb.toString();
93 }
94
95 public String escape(String text, int type) {
96 if (text == null) {
97 return null;
98 }
99
100 String prefix = StringPool.BLANK;
101 String postfix = StringPool.BLANK;
102
103 if (type == ESCAPE_MODE_ATTRIBUTE) {
104 prefix = "&#x";
105 postfix = StringPool.SEMICOLON;
106 }
107 else if (type == ESCAPE_MODE_CSS) {
108 prefix = StringPool.BACK_SLASH;
109 }
110 else if (type == ESCAPE_MODE_JS) {
111 prefix = "\\x";
112 }
113 else if (type == ESCAPE_MODE_URL) {
114 return HttpUtil.encodeURL(text, true);
115 }
116 else {
117 return escape(text);
118 }
119
120 StringBuilder sb = new StringBuilder();
121
122 for (int i = 0; i < text.length(); i++) {
123 char c = text.charAt(i);
124
125 if ((Character.isLetterOrDigit(c)) ||
126 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
127
128 sb.append(c);
129 }
130 else {
131 sb.append(prefix);
132 sb.append(Integer.toHexString(c));
133 sb.append(postfix);
134 }
135 }
136
137 return sb.toString();
138 }
139
140 public String escapeAttribute(String attribute) {
141 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
142 }
143
144 public String escapeCSS(String css) {
145 return escape(css, ESCAPE_MODE_CSS);
146 }
147
148 public String escapeJS(String js) {
149 return escape(js, ESCAPE_MODE_JS);
150 }
151
152 public String escapeURL(String url) {
153 return escape(url, ESCAPE_MODE_URL);
154 }
155
156 public String extractText(String html) {
157 if (html == null) {
158 return null;
159 }
160
161 Source source = new Source(html);
162
163 return source.getTextExtractor().toString();
164 }
165
166 public String fromInputSafe(String text) {
167 return StringUtil.replace(text, "&", "&");
168 }
169
170 public String replaceMsWordCharacters(String text) {
171 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
172 }
173
174 public String stripBetween(String text, String tag) {
175 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
176 }
177
178 public String stripComments(String text) {
179 return StringUtil.stripBetween(text, "<!--", "-->");
180 }
181
182 public String stripHtml(String text) {
183 if (text == null) {
184 return null;
185 }
186
187 text = stripComments(text);
188
189 StringBuilder sb = new StringBuilder(text.length());
190
191 int x = 0;
192 int y = text.indexOf("<");
193
194 while (y != -1) {
195 sb.append(text.substring(x, y));
196 sb.append(StringPool.SPACE);
197
198
200 boolean scriptFound = isScriptTag(text, y + 1);
201
202 if (scriptFound) {
203 int pos = y + _TAG_SCRIPT.length;
204
205
207 pos = text.indexOf(">", pos);
208
209 if (pos >= 0) {
210
211
214 if (text.charAt(pos-1) != '/') {
215
216
218 for (;;) {
219 pos = text.indexOf("</", pos);
220
221 if (pos >= 0) {
222 if (isScriptTag(text, pos + 2)) {
223 y = pos;
224
225 break;
226 }
227 else {
228
229
231 pos += 2;
232 }
233 }
234 else {
235 break;
236 }
237 }
238 }
239 }
240 }
241
242 x = text.indexOf(">", y);
243
244 if (x == -1) {
245 break;
246 }
247
248 x++;
249
250 if (x < y) {
251
252
254 break;
255 }
256
257 y = text.indexOf("<", x);
258 }
259
260 if (y == -1) {
261 sb.append(text.substring(x, text.length()));
262 }
263
264 return sb.toString();
265 }
266
267 public String toInputSafe(String text) {
268 return StringUtil.replace(
269 text,
270 new String[] {"&", "\""},
271 new String[] {"&", """});
272 }
273
274 public String unescape(String text) {
275 if (text == null) {
276 return null;
277 }
278
279
281 text = StringUtil.replace(text, "<", "<");
282 text = StringUtil.replace(text, ">", ">");
283 text = StringUtil.replace(text, "&", "&");
284 text = StringUtil.replace(text, """, "\"");
285 text = StringUtil.replace(text, "'", "'");
286 text = StringUtil.replace(text, "(", "(");
287 text = StringUtil.replace(text, ")", ")");
288 text = StringUtil.replace(text, "#", "#");
289 text = StringUtil.replace(text, "%", "%");
290 text = StringUtil.replace(text, ";", ";");
291 text = StringUtil.replace(text, "+", "+");
292 text = StringUtil.replace(text, "-", "-");
293
294 return text;
295 }
296
297 protected boolean isScriptTag(String text, int pos) {
298 if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
299 char item;
300
301 for (int i = 0; i < _TAG_SCRIPT.length; i++) {
302 item = text.charAt(pos++);
303
304 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
305 return false;
306 }
307 }
308
309 item = text.charAt(pos);
310
311
313 return !Character.isLetter(item);
314 }
315 else {
316 return false;
317 }
318 }
319
320 private static final String[] _MS_WORD_UNICODE = new String[] {
321 "\u00ae", "\u2019", "\u201c", "\u201d"
322 };
323
324 private static final String[] _MS_WORD_HTML = new String[] {
325 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
326 };
327
328 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
329
330 }