JerichoHTMLTextExtractor.java |
1 /** 2 * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved. 3 * 4 * The contents of this file are subject to the terms of the Liferay Enterprise 5 * Subscription License ("License"). You may not use this file except in 6 * compliance with the License. You can obtain a copy of the License by 7 * contacting Liferay, Inc. See the License for the specific language governing 8 * permissions and limitations under the License, including but not limited to 9 * distribution rights of the Software. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 17 * SOFTWARE. 18 */ 19 20 package com.liferay.util.lucene; 21 22 import au.id.jericho.lib.html.Source; 23 24 import java.io.IOException; 25 import java.io.InputStream; 26 import java.io.Reader; 27 import java.io.StringReader; 28 29 import org.apache.jackrabbit.extractor.HTMLTextExtractor; 30 31 /** 32 * <a href="JerichoHTMLTextExtractor.java.html"><b><i>View Source</i></b></a> 33 * 34 * @author Brian Wing Shun Chan 35 * 36 */ 37 public class JerichoHTMLTextExtractor extends HTMLTextExtractor { 38 39 public Reader extractText(InputStream stream, String type, String encoding) 40 throws IOException { 41 42 Source source = new Source(stream); 43 44 return new StringReader(source.getTextExtractor().toString()); 45 } 46 47 }