1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.util;
24  
25  import java.io.IOException;
26  import java.io.Reader;
27  
28  import java.util.ArrayList;
29  import java.util.List;
30  
31  import javax.swing.text.MutableAttributeSet;
32  import javax.swing.text.html.HTML;
33  import javax.swing.text.html.HTMLEditorKit;
34  
35  /**
36   * <a href="HTMLParser.java.html"><b><i>View Source</i></b></a>
37   *
38   * @author Brian Wing Shun Chan
39   *
40   */
41  public class HTMLParser {
42  
43      public HTMLParser(Reader reader) throws IOException {
44          HTMLEditorKit.Parser parser = new DefaultParser().getParser();
45  
46          parser.parse(reader, new HTMLCallback(), true);
47      }
48  
49      public List<String> getImages() {
50          return _images;
51      }
52  
53      public List<String> getLinks() {
54          return _links;
55      }
56  
57      private List<String> _images = new ArrayList<String>();
58      private List<String> _links = new ArrayList<String>();
59  
60      private class DefaultParser extends HTMLEditorKit {
61  
62          public HTMLEditorKit.Parser getParser() {
63              return super.getParser();
64          }
65  
66      }
67  
68      private class HTMLCallback extends HTMLEditorKit.ParserCallback{
69  
70          public void handleText(char[] data, int pos) {
71          }
72  
73          public void handleStartTag(
74              HTML.Tag tag, MutableAttributeSet attributes, int pos) {
75  
76              if (tag.equals(HTML.Tag.A)) {
77                  String href = (String)attributes.getAttribute(
78                      HTML.Attribute.HREF);
79  
80                  if (href != null) {
81                      _links.add(href);
82                  }
83              }
84              else if (tag.equals(HTML.Tag.IMG)) {
85                  String src = (String)attributes.getAttribute(
86                      HTML.Attribute.SRC);
87  
88                  if (src != null) {
89                      _images.add(src);
90                  }
91              }
92          }
93  
94          public void handleEndTag(HTML.Tag tag, int pos) {
95          }
96  
97          public void handleSimpleTag(
98              HTML.Tag tag, MutableAttributeSet attributes, int pos) {
99  
100             if (tag.equals(HTML.Tag.A)) {
101                 String href = (String)attributes.getAttribute(
102                     HTML.Attribute.HREF);
103 
104                 if (href != null) {
105                     _links.add(href);
106                 }
107             }
108             else if (tag.equals(HTML.Tag.IMG)) {
109                 String src = (String)attributes.getAttribute(
110                     HTML.Attribute.SRC);
111 
112                 if (src != null) {
113                     _images.add(src);
114                 }
115             }
116         }
117 
118         public void handleComment(char[] data, int pos) {
119         }
120 
121         public void handleError(String errorMsg, int pos) {
122         }
123 
124     }
125 
126 }