1   /**
2    * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
3    *
4    * The contents of this file are subject to the terms of the Liferay Enterprise
5    * Subscription License ("License"). You may not use this file except in
6    * compliance with the License. You can obtain a copy of the License by
7    * contacting Liferay, Inc. See the License for the specific language governing
8    * permissions and limitations under the License, including but not limited to
9    * distribution rights of the Software.
10   *
11   *
12   *
13   */
14  
15  package com.liferay.util.lucene;
16  
17  import com.liferay.portal.kernel.io.unsync.UnsyncStringReader;
18  
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.io.Reader;
22  
23  import net.htmlparser.jericho.Source;
24  
25  import org.apache.jackrabbit.extractor.HTMLTextExtractor;
26  
27  /**
28   * <a href="JerichoHTMLTextExtractor.java.html"><b><i>View Source</i></b></a>
29   *
30   * @author Brian Wing Shun Chan
31   */
32  public class JerichoHTMLTextExtractor extends HTMLTextExtractor {
33  
34      public Reader extractText(InputStream stream, String type, String encoding)
35          throws IOException {
36  
37          Source source = new Source(stream);
38  
39          return new UnsyncStringReader(source.getTextExtractor().toString());
40      }
41  
42  }