1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.util.lucene;
24  
25  import au.id.jericho.lib.html.Source;
26  
27  import java.io.IOException;
28  import java.io.InputStream;
29  import java.io.Reader;
30  import java.io.StringReader;
31  
32  import org.apache.jackrabbit.extractor.HTMLTextExtractor;
33  
34  /**
35   * <a href="JerichoHTMLTextExtractor.java.html"><b><i>View Source</i></b></a>
36   *
37   * @author Brian Wing Shun Chan
38   *
39   */
40  public class JerichoHTMLTextExtractor extends HTMLTextExtractor {
41  
42      public Reader extractText(InputStream stream, String type, String encoding)
43          throws IOException {
44  
45          Source source = new Source(stream);
46  
47          return new StringReader(source.getTextExtractor().toString());
48      }
49  
50  }