1   /**
2    * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
3    *
4    * This library is free software; you can redistribute it and/or modify it under
5    * the terms of the GNU Lesser General Public License as published by the Free
6    * Software Foundation; either version 2.1 of the License, or (at your option)
7    * any later version.
8    *
9    * This library is distributed in the hope that it will be useful, but WITHOUT
10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11   * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12   * details.
13   */
14  
15  package com.liferay.util;
16  
17  import java.io.IOException;
18  import java.io.Reader;
19  
20  import java.util.ArrayList;
21  import java.util.List;
22  
23  import javax.swing.text.MutableAttributeSet;
24  import javax.swing.text.html.HTML;
25  import javax.swing.text.html.HTMLEditorKit;
26  
27  /**
28   * <a href="HTMLParser.java.html"><b><i>View Source</i></b></a>
29   *
30   * @author Brian Wing Shun Chan
31   */
32  public class HTMLParser {
33  
34      public HTMLParser(Reader reader) throws IOException {
35          HTMLEditorKit.Parser parser = new DefaultParser().getParser();
36  
37          parser.parse(reader, new HTMLCallback(), true);
38      }
39  
40      public List<String> getImages() {
41          return _images;
42      }
43  
44      public List<String> getLinks() {
45          return _links;
46      }
47  
48      private List<String> _images = new ArrayList<String>();
49      private List<String> _links = new ArrayList<String>();
50  
51      private class DefaultParser extends HTMLEditorKit {
52  
53          public HTMLEditorKit.Parser getParser() {
54              return super.getParser();
55          }
56  
57      }
58  
59      private class HTMLCallback extends HTMLEditorKit.ParserCallback{
60  
61          public void handleText(char[] data, int pos) {
62          }
63  
64          public void handleStartTag(
65              HTML.Tag tag, MutableAttributeSet attributes, int pos) {
66  
67              if (tag.equals(HTML.Tag.A)) {
68                  String href = (String)attributes.getAttribute(
69                      HTML.Attribute.HREF);
70  
71                  if (href != null) {
72                      _links.add(href);
73                  }
74              }
75              else if (tag.equals(HTML.Tag.IMG)) {
76                  String src = (String)attributes.getAttribute(
77                      HTML.Attribute.SRC);
78  
79                  if (src != null) {
80                      _images.add(src);
81                  }
82              }
83          }
84  
85          public void handleEndTag(HTML.Tag tag, int pos) {
86          }
87  
88          public void handleSimpleTag(
89              HTML.Tag tag, MutableAttributeSet attributes, int pos) {
90  
91              if (tag.equals(HTML.Tag.A)) {
92                  String href = (String)attributes.getAttribute(
93                      HTML.Attribute.HREF);
94  
95                  if (href != null) {
96                      _links.add(href);
97                  }
98              }
99              else if (tag.equals(HTML.Tag.IMG)) {
100                 String src = (String)attributes.getAttribute(
101                     HTML.Attribute.SRC);
102 
103                 if (src != null) {
104                     _images.add(src);
105                 }
106             }
107         }
108 
109         public void handleComment(char[] data, int pos) {
110         }
111 
112         public void handleError(String errorMsg, int pos) {
113         }
114 
115     }
116 
117 }