1
22
23 package com.liferay.portal.util;
24
25 import au.id.jericho.lib.html.Source;
26
27 import com.liferay.portal.kernel.util.Html;
28 import com.liferay.portal.kernel.util.StringMaker;
29 import com.liferay.portal.kernel.util.StringPool;
30 import com.liferay.portal.kernel.util.StringUtil;
31
32
40 public class HtmlImpl implements Html {
41
42 public String escape(String text) {
43 if (text == null) {
44 return null;
45 }
46
47
51 StringMaker sm = new StringMaker(text.length());
52
53 for (int i = 0; i < text.length(); i++) {
54 char c = text.charAt(i);
55
56 switch (c) {
57 case '<':
58 sm.append("<");
59
60 break;
61
62 case '>':
63 sm.append(">");
64
65 break;
66
67 case '&':
68 sm.append("&");
69
70 break;
71
72 case '"':
73 sm.append(""");
74
75 break;
76
77 case '\'':
78 sm.append("'");
79
80 break;
81
82 case '(':
83 sm.append("(");
84
85 break;
86
87 case ')':
88 sm.append(")");
89
90 break;
91
92 case '#':
93 sm.append("#");
94
95 break;
96
97 case '%':
98 sm.append("%");
99
100 break;
101
102 case ';':
103 sm.append(";");
104
105 break;
106
107 case '+':
108 sm.append("+");
109
110 break;
111
112 case '-':
113 sm.append("-");
114
115 break;
116
117 default:
118 sm.append(c);
119
120 break;
121 }
122 }
123
124 return sm.toString();
125 }
126
127 public String extractText(String html) {
128 if (html == null) {
129 return null;
130 }
131
132 Source source = new Source(html);
133
134 return source.getTextExtractor().toString();
135 }
136
137 public String fromInputSafe(String text) {
138 return StringUtil.replace(text, "&", "&");
139 }
140
141 public String replaceMsWordCharacters(String text) {
142 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
143 }
144
145 public String stripBetween(String text, String tag) {
146 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
147 }
148
149 public String stripComments(String text) {
150 return StringUtil.stripBetween(text, "<!--", "-->");
151 }
152
153 public String stripHtml(String text) {
154 if (text == null) {
155 return null;
156 }
157
158 text = stripComments(text);
159
160 StringMaker sm = new StringMaker(text.length());
161
162 int x = 0;
163 int y = text.indexOf("<");
164
165 while (y != -1) {
166 sm.append(text.substring(x, y));
167 sm.append(StringPool.SPACE);
168
169
171 boolean scriptFound = isScriptTag(text, y + 1);
172
173 if (scriptFound) {
174 int pos = y + _TAG_SCRIPT.length;
175
176
178 pos = text.indexOf(">", pos);
179
180 if (pos >= 0) {
181
182
185 if (text.charAt(pos-1) != '/') {
186
187
189 for (;;) {
190 pos = text.indexOf("</", pos);
191
192 if (pos >= 0) {
193 if (isScriptTag(text, pos + 2)) {
194 y = pos;
195
196 break;
197 }
198 else {
199
200
202 pos += 2;
203 }
204 }
205 else {
206 break;
207 }
208 }
209 }
210 }
211 }
212
213 x = text.indexOf(">", y);
214
215 if (x == -1) {
216 break;
217 }
218
219 x++;
220
221 if (x < y) {
222
223
225 break;
226 }
227
228 y = text.indexOf("<", x);
229 }
230
231 if (y == -1) {
232 sm.append(text.substring(x, text.length()));
233 }
234
235 return sm.toString();
236 }
237
238 public String toInputSafe(String text) {
239 return StringUtil.replace(
240 text,
241 new String[] {"&", "\""},
242 new String[] {"&", """});
243 }
244
245 public String unescape(String text) {
246 if (text == null) {
247 return null;
248 }
249
250
252 text = StringUtil.replace(text, "<", "<");
253 text = StringUtil.replace(text, ">", ">");
254 text = StringUtil.replace(text, "&", "&");
255 text = StringUtil.replace(text, """, "\"");
256 text = StringUtil.replace(text, "'", "'");
257 text = StringUtil.replace(text, "(", "(");
258 text = StringUtil.replace(text, ")", ")");
259 text = StringUtil.replace(text, "#", "#");
260 text = StringUtil.replace(text, "%", "%");
261 text = StringUtil.replace(text, ";", ";");
262 text = StringUtil.replace(text, "+", "+");
263 text = StringUtil.replace(text, "-", "-");
264
265 return text;
266 }
267
268 protected boolean isScriptTag(String text, int pos) {
269 if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
270 char item;
271
272 for (int i = 0; i < _TAG_SCRIPT.length; i++) {
273 item = text.charAt(pos++);
274
275 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
276 return false;
277 }
278 }
279
280 item = text.charAt(pos);
281
282
284 return !Character.isLetter(item);
285 }
286 else {
287 return false;
288 }
289 }
290
291 private static final String[] _MS_WORD_UNICODE = new String[] {
292 "\u00ae", "\u2019", "\u201c", "\u201d"
293 };
294
295 private static final String[] _MS_WORD_HTML = new String[] {
296 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
297 };
298
299 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
300
301 }