1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    *
5    *
6    *
7    * The contents of this file are subject to the terms of the Liferay Enterprise
8    * Subscription License ("License"). You may not use this file except in
9    * compliance with the License. You can obtain a copy of the License by
10   * contacting Liferay, Inc. See the License for the specific language governing
11   * permissions and limitations under the License, including but not limited to
12   * distribution rights of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.util;
24  
25  import java.io.IOException;
26  import java.io.Reader;
27  
28  import java.util.ArrayList;
29  import java.util.List;
30  
31  import javax.swing.text.MutableAttributeSet;
32  import javax.swing.text.html.HTML;
33  import javax.swing.text.html.HTMLEditorKit;
34  
35  /**
36   * <a href="HTMLParser.java.html"><b><i>View Source</i></b></a>
37   *
38   * @author Brian Wing Shun Chan
39   */
40  public class HTMLParser {
41  
42      public HTMLParser(Reader reader) throws IOException {
43          HTMLEditorKit.Parser parser = new DefaultParser().getParser();
44  
45          parser.parse(reader, new HTMLCallback(), true);
46      }
47  
48      public List<String> getImages() {
49          return _images;
50      }
51  
52      public List<String> getLinks() {
53          return _links;
54      }
55  
56      private List<String> _images = new ArrayList<String>();
57      private List<String> _links = new ArrayList<String>();
58  
59      private class DefaultParser extends HTMLEditorKit {
60  
61          public HTMLEditorKit.Parser getParser() {
62              return super.getParser();
63          }
64  
65      }
66  
67      private class HTMLCallback extends HTMLEditorKit.ParserCallback{
68  
69          public void handleText(char[] data, int pos) {
70          }
71  
72          public void handleStartTag(
73              HTML.Tag tag, MutableAttributeSet attributes, int pos) {
74  
75              if (tag.equals(HTML.Tag.A)) {
76                  String href = (String)attributes.getAttribute(
77                      HTML.Attribute.HREF);
78  
79                  if (href != null) {
80                      _links.add(href);
81                  }
82              }
83              else if (tag.equals(HTML.Tag.IMG)) {
84                  String src = (String)attributes.getAttribute(
85                      HTML.Attribute.SRC);
86  
87                  if (src != null) {
88                      _images.add(src);
89                  }
90              }
91          }
92  
93          public void handleEndTag(HTML.Tag tag, int pos) {
94          }
95  
96          public void handleSimpleTag(
97              HTML.Tag tag, MutableAttributeSet attributes, int pos) {
98  
99              if (tag.equals(HTML.Tag.A)) {
100                 String href = (String)attributes.getAttribute(
101                     HTML.Attribute.HREF);
102 
103                 if (href != null) {
104                     _links.add(href);
105                 }
106             }
107             else if (tag.equals(HTML.Tag.IMG)) {
108                 String src = (String)attributes.getAttribute(
109                     HTML.Attribute.SRC);
110 
111                 if (src != null) {
112                     _images.add(src);
113                 }
114             }
115         }
116 
117         public void handleComment(char[] data, int pos) {
118         }
119 
120         public void handleError(String errorMsg, int pos) {
121         }
122 
123     }
124 
125 }