001
014
015 package com.liferay.util.lucene;
016
017 import com.liferay.portal.kernel.io.unsync.UnsyncStringReader;
018
019 import java.io.IOException;
020 import java.io.InputStream;
021 import java.io.Reader;
022
023 import net.htmlparser.jericho.Source;
024
025 import org.apache.jackrabbit.extractor.HTMLTextExtractor;
026
027
030 public class JerichoHTMLTextExtractor extends HTMLTextExtractor {
031
032 public Reader extractText(InputStream stream, String type, String encoding)
033 throws IOException {
034
035 Source source = new Source(stream);
036
037 return new UnsyncStringReader(source.getTextExtractor().toString());
038 }
039
040 }