1
14
15 package com.liferay.portal.util;
16
17 import com.liferay.portal.kernel.util.CharPool;
18 import com.liferay.portal.kernel.util.Html;
19 import com.liferay.portal.kernel.util.HttpUtil;
20 import com.liferay.portal.kernel.util.StringBundler;
21 import com.liferay.portal.kernel.util.StringPool;
22 import com.liferay.portal.kernel.util.StringUtil;
23
24 import java.util.regex.Matcher;
25 import java.util.regex.Pattern;
26
27 import net.htmlparser.jericho.Source;
28
29
38 public class HtmlImpl implements Html {
39
40 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
41
42 public static final int ESCAPE_MODE_CSS = 2;
43
44 public static final int ESCAPE_MODE_JS = 3;
45
46 public static final int ESCAPE_MODE_TEXT = 4;
47
48 public static final int ESCAPE_MODE_URL = 5;
49
50 public String escape(String text) {
51 if (text == null) {
52 return null;
53 }
54
55 if (text.length() == 0) {
56 return StringPool.BLANK;
57 }
58
59
63 StringBuilder sb = new StringBuilder(text.length());
64
65 for (int i = 0; i < text.length(); i++) {
66 char c = text.charAt(i);
67
68 switch (c) {
69 case '<':
70 sb.append("<");
71
72 break;
73
74 case '>':
75 sb.append(">");
76
77 break;
78
79 case '&':
80 sb.append("&");
81
82 break;
83
84 case '"':
85 sb.append(""");
86
87 break;
88
89 case '\'':
90 sb.append("'");
91
92 break;
93
94 case '\u00bb': sb.append("»");
96
97 break;
98
99 default:
100 sb.append(c);
101
102 break;
103 }
104 }
105
106 if (sb.length() == text.length()) {
107 return text;
108 }
109 else {
110 return sb.toString();
111 }
112 }
113
114 public String escape(String text, int type) {
115 if (text == null) {
116 return null;
117 }
118
119 if (text.length() == 0) {
120 return StringPool.BLANK;
121 }
122
123 String prefix = StringPool.BLANK;
124 String postfix = StringPool.BLANK;
125
126 if (type == ESCAPE_MODE_ATTRIBUTE) {
127 prefix = "&#x";
128 postfix = StringPool.SEMICOLON;
129 }
130 else if (type == ESCAPE_MODE_CSS) {
131 prefix = StringPool.BACK_SLASH;
132 }
133 else if (type == ESCAPE_MODE_JS) {
134 prefix = "\\x";
135 }
136 else if (type == ESCAPE_MODE_URL) {
137 return HttpUtil.encodeURL(text, true);
138 }
139 else {
140 return escape(text);
141 }
142
143 StringBuilder sb = new StringBuilder();
144
145 for (int i = 0; i < text.length(); i++) {
146 char c = text.charAt(i);
147
148 if ((Character.isLetterOrDigit(c)) ||
149 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
150
151 sb.append(c);
152 }
153 else {
154 sb.append(prefix);
155 sb.append(Integer.toHexString(c));
156 sb.append(postfix);
157 }
158 }
159
160 if (sb.length() == text.length()) {
161 return text;
162 }
163 else {
164 return sb.toString();
165 }
166 }
167
168 public String escapeAttribute(String attribute) {
169 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
170 }
171
172 public String escapeCSS(String css) {
173 return escape(css, ESCAPE_MODE_CSS);
174 }
175
176 public String escapeHREF(String href) {
177 if (href == null) {
178 return null;
179 }
180
181 if (href.length() == 0) {
182 return StringPool.BLANK;
183 }
184
185 if (href.indexOf(StringPool.COLON) == 10) {
186 String protocol = href.substring(0, 10).toLowerCase();
187
188 if (protocol.equals("javascript")) {
189 return StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
190 }
191 }
192
193 return href;
194 }
195
196 public String escapeJS(String js) {
197 return escape(js, ESCAPE_MODE_JS);
198 }
199
200 public String escapeURL(String url) {
201 return escape(url, ESCAPE_MODE_URL);
202 }
203
204 public String extractText(String html) {
205 if (html == null) {
206 return null;
207 }
208
209 Source source = new Source(html);
210
211 return source.getTextExtractor().toString();
212 }
213
214 public String fromInputSafe(String text) {
215 return StringUtil.replace(text, "&", "&");
216 }
217
218 public String replaceMsWordCharacters(String text) {
219 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
220 }
221
222 public String stripBetween(String text, String tag) {
223 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
224 }
225
226 public String stripComments(String text) {
227 return StringUtil.stripBetween(text, "<!--", "-->");
228 }
229
230 public String stripHtml(String text) {
231 if (text == null) {
232 return null;
233 }
234
235 text = stripComments(text);
236
237 StringBuilder sb = new StringBuilder(text.length());
238
239 int x = 0;
240 int y = text.indexOf("<");
241
242 while (y != -1) {
243 sb.append(text.substring(x, y));
244 sb.append(StringPool.SPACE);
245
246
248 boolean scriptFound = isScriptTag(text, y + 1);
249
250 if (scriptFound) {
251 int pos = y + _TAG_SCRIPT.length;
252
253
255 pos = text.indexOf(">", pos);
256
257 if (pos >= 0) {
258
259
262 if (text.charAt(pos-1) != '/') {
263
264
266 for (;;) {
267 pos = text.indexOf("</", pos);
268
269 if (pos >= 0) {
270 if (isScriptTag(text, pos + 2)) {
271 y = pos;
272
273 break;
274 }
275 else {
276
277
279 pos += 2;
280 }
281 }
282 else {
283 break;
284 }
285 }
286 }
287 }
288 }
289
290 x = text.indexOf(">", y);
291
292 if (x == -1) {
293 break;
294 }
295
296 x++;
297
298 if (x < y) {
299
300
302 break;
303 }
304
305 y = text.indexOf("<", x);
306 }
307
308 if (y == -1) {
309 sb.append(text.substring(x, text.length()));
310 }
311
312 return sb.toString();
313 }
314
315 public String toInputSafe(String text) {
316 return StringUtil.replace(
317 text,
318 new String[] {"&", "\""},
319 new String[] {"&", """});
320 }
321
322 public String unescape(String text) {
323 if (text == null) {
324 return null;
325 }
326
327 if (text.length() == 0) {
328 return StringPool.BLANK;
329 }
330
331
333 text = StringUtil.replace(text, "<", "<");
334 text = StringUtil.replace(text, ">", ">");
335 text = StringUtil.replace(text, "&", "&");
336 text = StringUtil.replace(text, """, "\"");
337 text = StringUtil.replace(text, "'", "'");
338 text = StringUtil.replace(text, "(", "(");
339 text = StringUtil.replace(text, ")", ")");
340 text = StringUtil.replace(text, ",", ",");
341 text = StringUtil.replace(text, "#", "#");
342 text = StringUtil.replace(text, "%", "%");
343 text = StringUtil.replace(text, ";", ";");
344 text = StringUtil.replace(text, "=", "=");
345 text = StringUtil.replace(text, "+", "+");
346 text = StringUtil.replace(text, "-", "-");
347
348 return text;
349 }
350
351 public String wordBreak(String text, int columns) {
352 StringBundler sb = new StringBundler();
353
354 int length = 0;
355 int lastWrite = 0;
356 int pos = 0;
357
358 Pattern pattern = Pattern.compile("([\\s<&]|$)");
359
360 Matcher matcher = pattern.matcher(text);
361
362 while (matcher.find()) {
363 if (matcher.start() < pos) {
364 continue;
365 }
366
367 while ((length + matcher.start() - pos) >= columns) {
368 pos += columns - length;
369
370 sb.append(text.substring(lastWrite, pos));
371 sb.append("<wbr/>");
372
373 length = 0;
374 lastWrite = pos;
375 }
376
377 length += matcher.start() - pos;
378
379 String group = matcher.group();
380
381 if (group.equals(StringPool.AMPERSAND)) {
382 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
383
384 if (x != -1) {
385 length++;
386 pos = x + 1;
387 }
388
389 continue;
390 }
391
392 if (group.equals(StringPool.LESS_THAN)) {
393 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
394
395 if (x != -1) {
396 pos = x + 1;
397 }
398
399 continue;
400 }
401
402 if (group.equals(StringPool.SPACE) ||
403 group.equals(StringPool.NEW_LINE)) {
404
405 length = 0;
406 pos = matcher.start() + 1;
407 }
408 }
409
410 sb.append(text.substring(lastWrite));
411
412 return sb.toString();
413 }
414
415 protected boolean isScriptTag(String text, int pos) {
416 if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
417 char item;
418
419 for (int i = 0; i < _TAG_SCRIPT.length; i++) {
420 item = text.charAt(pos++);
421
422 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
423 return false;
424 }
425 }
426
427 item = text.charAt(pos);
428
429
431 return !Character.isLetter(item);
432 }
433 else {
434 return false;
435 }
436 }
437
438 private static final String[] _MS_WORD_UNICODE = new String[] {
439 "\u00ae", "\u2019", "\u201c", "\u201d"
440 };
441
442 private static final String[] _MS_WORD_HTML = new String[] {
443 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
444 };
445
446 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
447
448 }