1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * The contents of this file are subject to the terms of the Liferay Enterprise
5    * Subscription License ("License"). You may not use this file except in
6    * compliance with the License. You can obtain a copy of the License by
7    * contacting Liferay, Inc. See the License for the specific language governing
8    * permissions and limitations under the License, including but not limited to
9    * distribution rights of the Software.
10   *
11   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17   * SOFTWARE.
18   */
19  
20  package com.liferay.util;
21  
22  import java.io.IOException;
23  import java.io.Reader;
24  
25  import java.util.ArrayList;
26  import java.util.List;
27  
28  import javax.swing.text.MutableAttributeSet;
29  import javax.swing.text.html.HTML;
30  import javax.swing.text.html.HTMLEditorKit;
31  
32  /**
33   * <a href="HTMLParser.java.html"><b><i>View Source</i></b></a>
34   *
35   * @author Brian Wing Shun Chan
36   *
37   */
38  public class HTMLParser {
39  
40      public HTMLParser(Reader reader) throws IOException {
41          HTMLEditorKit.Parser parser = new DefaultParser().getParser();
42  
43          parser.parse(reader, new HTMLCallback(), true);
44      }
45  
46      public List<String> getImages() {
47          return _images;
48      }
49  
50      public List<String> getLinks() {
51          return _links;
52      }
53  
54      private List<String> _images = new ArrayList<String>();
55      private List<String> _links = new ArrayList<String>();
56  
57      private class DefaultParser extends HTMLEditorKit {
58  
59          public HTMLEditorKit.Parser getParser() {
60              return super.getParser();
61          }
62  
63      }
64  
65      private class HTMLCallback extends HTMLEditorKit.ParserCallback{
66  
67          public void handleText(char[] data, int pos) {
68          }
69  
70          public void handleStartTag(
71              HTML.Tag tag, MutableAttributeSet attributes, int pos) {
72  
73              if (tag.equals(HTML.Tag.A)) {
74                  String href = (String)attributes.getAttribute(
75                      HTML.Attribute.HREF);
76  
77                  if (href != null) {
78                      _links.add(href);
79                  }
80              }
81              else if (tag.equals(HTML.Tag.IMG)) {
82                  String src = (String)attributes.getAttribute(
83                      HTML.Attribute.SRC);
84  
85                  if (src != null) {
86                      _images.add(src);
87                  }
88              }
89          }
90  
91          public void handleEndTag(HTML.Tag tag, int pos) {
92          }
93  
94          public void handleSimpleTag(
95              HTML.Tag tag, MutableAttributeSet attributes, int pos) {
96  
97              if (tag.equals(HTML.Tag.A)) {
98                  String href = (String)attributes.getAttribute(
99                      HTML.Attribute.HREF);
100 
101                 if (href != null) {
102                     _links.add(href);
103                 }
104             }
105             else if (tag.equals(HTML.Tag.IMG)) {
106                 String src = (String)attributes.getAttribute(
107                     HTML.Attribute.SRC);
108 
109                 if (src != null) {
110                     _images.add(src);
111                 }
112             }
113         }
114 
115         public void handleComment(char[] data, int pos) {
116         }
117 
118         public void handleError(String errorMsg, int pos) {
119         }
120 
121     }
122 
123 }