1
22
23 package com.liferay.portal.util;
24
25 import au.id.jericho.lib.html.Source;
26
27 import com.liferay.portal.kernel.util.Html;
28 import com.liferay.portal.kernel.util.StringPool;
29 import com.liferay.portal.kernel.util.StringUtil;
30
31
39 public class HtmlImpl implements Html {
40
41 public String escape(String text) {
42 if (text == null) {
43 return null;
44 }
45
46
50 StringBuilder sb = new StringBuilder(text.length());
51
52 for (int i = 0; i < text.length(); i++) {
53 char c = text.charAt(i);
54
55 switch (c) {
56 case '<':
57 sb.append("<");
58
59 break;
60
61 case '>':
62 sb.append(">");
63
64 break;
65
66 case '&':
67 sb.append("&");
68
69 break;
70
71 case '"':
72 sb.append(""");
73
74 break;
75
76 case '\'':
77 sb.append("'");
78
79 break;
80
81 default:
82 sb.append(c);
83
84 break;
85 }
86 }
87
88 return sb.toString();
89 }
90
91 public String extractText(String html) {
92 if (html == null) {
93 return null;
94 }
95
96 Source source = new Source(html);
97
98 return source.getTextExtractor().toString();
99 }
100
101 public String fromInputSafe(String text) {
102 return StringUtil.replace(text, "&", "&");
103 }
104
105 public String replaceMsWordCharacters(String text) {
106 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
107 }
108
109 public String stripBetween(String text, String tag) {
110 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
111 }
112
113 public String stripComments(String text) {
114 return StringUtil.stripBetween(text, "<!--", "-->");
115 }
116
117 public String stripHtml(String text) {
118 if (text == null) {
119 return null;
120 }
121
122 text = stripComments(text);
123
124 StringBuilder sb = new StringBuilder(text.length());
125
126 int x = 0;
127 int y = text.indexOf("<");
128
129 while (y != -1) {
130 sb.append(text.substring(x, y));
131 sb.append(StringPool.SPACE);
132
133
135 boolean scriptFound = isScriptTag(text, y + 1);
136
137 if (scriptFound) {
138 int pos = y + _TAG_SCRIPT.length;
139
140
142 pos = text.indexOf(">", pos);
143
144 if (pos >= 0) {
145
146
149 if (text.charAt(pos-1) != '/') {
150
151
153 for (;;) {
154 pos = text.indexOf("</", pos);
155
156 if (pos >= 0) {
157 if (isScriptTag(text, pos + 2)) {
158 y = pos;
159
160 break;
161 }
162 else {
163
164
166 pos += 2;
167 }
168 }
169 else {
170 break;
171 }
172 }
173 }
174 }
175 }
176
177 x = text.indexOf(">", y);
178
179 if (x == -1) {
180 break;
181 }
182
183 x++;
184
185 if (x < y) {
186
187
189 break;
190 }
191
192 y = text.indexOf("<", x);
193 }
194
195 if (y == -1) {
196 sb.append(text.substring(x, text.length()));
197 }
198
199 return sb.toString();
200 }
201
202 public String toInputSafe(String text) {
203 return StringUtil.replace(
204 text,
205 new String[] {"&", "\""},
206 new String[] {"&", """});
207 }
208
209 public String unescape(String text) {
210 if (text == null) {
211 return null;
212 }
213
214
216 text = StringUtil.replace(text, "<", "<");
217 text = StringUtil.replace(text, ">", ">");
218 text = StringUtil.replace(text, "&", "&");
219 text = StringUtil.replace(text, """, "\"");
220 text = StringUtil.replace(text, "'", "'");
221 text = StringUtil.replace(text, "(", "(");
222 text = StringUtil.replace(text, ")", ")");
223 text = StringUtil.replace(text, "#", "#");
224 text = StringUtil.replace(text, "%", "%");
225 text = StringUtil.replace(text, ";", ";");
226 text = StringUtil.replace(text, "+", "+");
227 text = StringUtil.replace(text, "-", "-");
228
229 return text;
230 }
231
232 protected boolean isScriptTag(String text, int pos) {
233 if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
234 char item;
235
236 for (int i = 0; i < _TAG_SCRIPT.length; i++) {
237 item = text.charAt(pos++);
238
239 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
240 return false;
241 }
242 }
243
244 item = text.charAt(pos);
245
246
248 return !Character.isLetter(item);
249 }
250 else {
251 return false;
252 }
253 }
254
255 private static final String[] _MS_WORD_UNICODE = new String[] {
256 "\u00ae", "\u2019", "\u201c", "\u201d"
257 };
258
259 private static final String[] _MS_WORD_HTML = new String[] {
260 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
261 };
262
263 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
264
265 }