1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * The contents of this file are subject to the terms of the Liferay Enterprise
5    * Subscription License ("License"). You may not use this file except in
6    * compliance with the License. You can obtain a copy of the License by
7    * contacting Liferay, Inc. See the License for the specific language governing
8    * permissions and limitations under the License, including but not limited to
9    * distribution rights of the Software.
10   *
11   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17   * SOFTWARE.
18   */
19  
20  package com.liferay.util.lucene;
21  
22  import au.id.jericho.lib.html.Source;
23  
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.io.Reader;
27  import java.io.StringReader;
28  
29  import org.apache.jackrabbit.extractor.HTMLTextExtractor;
30  
31  /**
32   * <a href="JerichoHTMLTextExtractor.java.html"><b><i>View Source</i></b></a>
33   *
34   * @author Brian Wing Shun Chan
35   *
36   */
37  public class JerichoHTMLTextExtractor extends HTMLTextExtractor {
38  
39      public Reader extractText(InputStream stream, String type, String encoding)
40          throws IOException {
41  
42          Source source = new Source(stream);
43  
44          return new StringReader(source.getTextExtractor().toString());
45      }
46  
47  }