001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.util.CharPool;
018 import com.liferay.portal.kernel.util.Html;
019 import com.liferay.portal.kernel.util.HttpUtil;
020 import com.liferay.portal.kernel.util.StringBundler;
021 import com.liferay.portal.kernel.util.StringPool;
022 import com.liferay.portal.kernel.util.StringUtil;
023
024 import java.util.regex.Matcher;
025 import java.util.regex.Pattern;
026
027 import net.htmlparser.jericho.Source;
028
029
036 public class HtmlImpl implements Html {
037
038 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
039
040 public static final int ESCAPE_MODE_CSS = 2;
041
042 public static final int ESCAPE_MODE_JS = 3;
043
044 public static final int ESCAPE_MODE_TEXT = 4;
045
046 public static final int ESCAPE_MODE_URL = 5;
047
048 public String escape(String text) {
049 if (text == null) {
050 return null;
051 }
052
053 if (text.length() == 0) {
054 return StringPool.BLANK;
055 }
056
057
058
059
060
061 StringBuilder sb = new StringBuilder(text.length());
062
063 for (int i = 0; i < text.length(); i++) {
064 char c = text.charAt(i);
065
066 switch (c) {
067 case '<':
068 sb.append("<");
069
070 break;
071
072 case '>':
073 sb.append(">");
074
075 break;
076
077 case '&':
078 sb.append("&");
079
080 break;
081
082 case '"':
083 sb.append(""");
084
085 break;
086
087 case '\'':
088 sb.append("'");
089
090 break;
091
092 case '\u00bb':
093 sb.append("»");
094
095 break;
096
097 default:
098 sb.append(c);
099
100 break;
101 }
102 }
103
104 if (sb.length() == text.length()) {
105 return text;
106 }
107 else {
108 return sb.toString();
109 }
110 }
111
112 public String escape(String text, int type) {
113 if (text == null) {
114 return null;
115 }
116
117 if (text.length() == 0) {
118 return StringPool.BLANK;
119 }
120
121 String prefix = StringPool.BLANK;
122 String postfix = StringPool.BLANK;
123
124 if (type == ESCAPE_MODE_ATTRIBUTE) {
125 prefix = "&#x";
126 postfix = StringPool.SEMICOLON;
127 }
128 else if (type == ESCAPE_MODE_CSS) {
129 prefix = StringPool.BACK_SLASH;
130 }
131 else if (type == ESCAPE_MODE_JS) {
132 prefix = "\\x";
133 }
134 else if (type == ESCAPE_MODE_URL) {
135 return HttpUtil.encodeURL(text, true);
136 }
137 else {
138 return escape(text);
139 }
140
141 StringBuilder sb = new StringBuilder();
142
143 for (int i = 0; i < text.length(); i++) {
144 char c = text.charAt(i);
145
146 if ((Character.isLetterOrDigit(c)) ||
147 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
148
149 sb.append(c);
150 }
151 else {
152 sb.append(prefix);
153 sb.append(Integer.toHexString(c));
154 sb.append(postfix);
155 }
156 }
157
158 if (sb.length() == text.length()) {
159 return text;
160 }
161 else {
162 return sb.toString();
163 }
164 }
165
166 public String escapeAttribute(String attribute) {
167 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
168 }
169
170 public String escapeCSS(String css) {
171 return escape(css, ESCAPE_MODE_CSS);
172 }
173
174 public String escapeHREF(String href) {
175 if (href == null) {
176 return null;
177 }
178
179 if (href.length() == 0) {
180 return StringPool.BLANK;
181 }
182
183 if (href.indexOf(StringPool.COLON) == 10) {
184 String protocol = href.substring(0, 10).toLowerCase();
185
186 if (protocol.equals("javascript")) {
187 return StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
188 }
189 }
190
191 return href;
192 }
193
194 public String escapeJS(String js) {
195 return escape(js, ESCAPE_MODE_JS);
196 }
197
198 public String escapeURL(String url) {
199 return escape(url, ESCAPE_MODE_URL);
200 }
201
202 public String extractText(String html) {
203 if (html == null) {
204 return null;
205 }
206
207 Source source = new Source(html);
208
209 return source.getTextExtractor().toString();
210 }
211
212 public String fromInputSafe(String text) {
213 return StringUtil.replace(text, "&", "&");
214 }
215
216 public String replaceMsWordCharacters(String text) {
217 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
218 }
219
220 public String stripBetween(String text, String tag) {
221 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
222 }
223
224 public String stripComments(String text) {
225 return StringUtil.stripBetween(text, "<!--", "-->");
226 }
227
228 public String stripHtml(String text) {
229 if (text == null) {
230 return null;
231 }
232
233 text = stripComments(text);
234
235 StringBuilder sb = new StringBuilder(text.length());
236
237 int x = 0;
238 int y = text.indexOf("<");
239
240 while (y != -1) {
241 sb.append(text.substring(x, y));
242 sb.append(StringPool.SPACE);
243
244
245
246 boolean scriptFound = isScriptTag(text, y + 1);
247
248 if (scriptFound) {
249 int pos = y + _TAG_SCRIPT.length;
250
251
252
253 pos = text.indexOf(">", pos);
254
255 if (pos >= 0) {
256
257
258
259
260 if (text.charAt(pos-1) != '/') {
261
262
263
264 for (;;) {
265 pos = text.indexOf("</", pos);
266
267 if (pos >= 0) {
268 if (isScriptTag(text, pos + 2)) {
269 y = pos;
270
271 break;
272 }
273 else {
274
275
276
277 pos += 2;
278 }
279 }
280 else {
281 break;
282 }
283 }
284 }
285 }
286 }
287
288 x = text.indexOf(">", y);
289
290 if (x == -1) {
291 break;
292 }
293
294 x++;
295
296 if (x < y) {
297
298
299
300 break;
301 }
302
303 y = text.indexOf("<", x);
304 }
305
306 if (y == -1) {
307 sb.append(text.substring(x, text.length()));
308 }
309
310 return sb.toString();
311 }
312
313 public String toInputSafe(String text) {
314 return StringUtil.replace(
315 text,
316 new String[] {"&", "\""},
317 new String[] {"&", """});
318 }
319
320 public String unescape(String text) {
321 if (text == null) {
322 return null;
323 }
324
325 if (text.length() == 0) {
326 return StringPool.BLANK;
327 }
328
329
330
331 text = StringUtil.replace(text, "<", "<");
332 text = StringUtil.replace(text, ">", ">");
333 text = StringUtil.replace(text, "&", "&");
334 text = StringUtil.replace(text, """, "\"");
335 text = StringUtil.replace(text, "'", "'");
336 text = StringUtil.replace(text, "(", "(");
337 text = StringUtil.replace(text, ")", ")");
338 text = StringUtil.replace(text, ",", ",");
339 text = StringUtil.replace(text, "#", "#");
340 text = StringUtil.replace(text, "%", "%");
341 text = StringUtil.replace(text, ";", ";");
342 text = StringUtil.replace(text, "=", "=");
343 text = StringUtil.replace(text, "+", "+");
344 text = StringUtil.replace(text, "-", "-");
345
346 return text;
347 }
348
349 public String wordBreak(String text, int columns) {
350 StringBundler sb = new StringBundler();
351
352 int length = 0;
353 int lastWrite = 0;
354 int pos = 0;
355
356 Pattern pattern = Pattern.compile("([\\s<&]|$)");
357
358 Matcher matcher = pattern.matcher(text);
359
360 while (matcher.find()) {
361 if (matcher.start() < pos) {
362 continue;
363 }
364
365 while ((length + matcher.start() - pos) >= columns) {
366 pos += columns - length;
367
368 sb.append(text.substring(lastWrite, pos));
369 sb.append("<wbr/>");
370
371 length = 0;
372 lastWrite = pos;
373 }
374
375 length += matcher.start() - pos;
376
377 String group = matcher.group();
378
379 if (group.equals(StringPool.AMPERSAND)) {
380 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
381
382 if (x != -1) {
383 length++;
384 pos = x + 1;
385 }
386
387 continue;
388 }
389
390 if (group.equals(StringPool.LESS_THAN)) {
391 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
392
393 if (x != -1) {
394 pos = x + 1;
395 }
396
397 continue;
398 }
399
400 if (group.equals(StringPool.SPACE) ||
401 group.equals(StringPool.NEW_LINE)) {
402
403 length = 0;
404 pos = matcher.start() + 1;
405 }
406 }
407
408 sb.append(text.substring(lastWrite));
409
410 return sb.toString();
411 }
412
413 protected boolean isScriptTag(String text, int pos) {
414 if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
415 char item;
416
417 for (int i = 0; i < _TAG_SCRIPT.length; i++) {
418 item = text.charAt(pos++);
419
420 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
421 return false;
422 }
423 }
424
425 item = text.charAt(pos);
426
427
428
429 return !Character.isLetter(item);
430 }
431 else {
432 return false;
433 }
434 }
435
436 private static final String[] _MS_WORD_UNICODE = new String[] {
437 "\u00ae", "\u2019", "\u201c", "\u201d"
438 };
439
440 private static final String[] _MS_WORD_HTML = new String[] {
441 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
442 };
443
444 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
445
446 }