1   /**
2    * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
3    *
4    * This library is free software; you can redistribute it and/or modify it under
5    * the terms of the GNU Lesser General Public License as published by the Free
6    * Software Foundation; either version 2.1 of the License, or (at your option)
7    * any later version.
8    *
9    * This library is distributed in the hope that it will be useful, but WITHOUT
10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11   * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12   * details.
13   */
14  
15  package com.liferay.portal.search.lucene;
16  
17  import com.liferay.portal.kernel.io.unsync.UnsyncStringReader;
18  import com.liferay.portal.kernel.log.Log;
19  import com.liferay.portal.kernel.log.LogFactoryUtil;
20  import com.liferay.portal.kernel.search.Field;
21  import com.liferay.portal.kernel.util.PropsKeys;
22  import com.liferay.portal.kernel.util.StringPool;
23  import com.liferay.portal.kernel.util.StringUtil;
24  import com.liferay.portal.kernel.util.Validator;
25  import com.liferay.portal.util.PropsUtil;
26  import com.liferay.util.lucene.KeywordsUtil;
27  
28  import java.io.IOException;
29  
30  import java.util.HashSet;
31  import java.util.Map;
32  import java.util.Set;
33  import java.util.concurrent.ConcurrentHashMap;
34  
35  import org.apache.lucene.analysis.Analyzer;
36  import org.apache.lucene.analysis.TokenStream;
37  import org.apache.lucene.analysis.WhitespaceAnalyzer;
38  import org.apache.lucene.document.Document;
39  import org.apache.lucene.index.Term;
40  import org.apache.lucene.queryParser.ParseException;
41  import org.apache.lucene.queryParser.QueryParser;
42  import org.apache.lucene.search.BooleanClause;
43  import org.apache.lucene.search.BooleanQuery;
44  import org.apache.lucene.search.IndexSearcher;
45  import org.apache.lucene.search.Query;
46  import org.apache.lucene.search.TermQuery;
47  import org.apache.lucene.search.WildcardQuery;
48  import org.apache.lucene.search.highlight.Highlighter;
49  import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
50  import org.apache.lucene.search.highlight.QueryScorer;
51  import org.apache.lucene.search.highlight.QueryTermExtractor;
52  import org.apache.lucene.search.highlight.SimpleFragmenter;
53  import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
54  import org.apache.lucene.search.highlight.WeightedTerm;
55  
56  /**
57   * <a href="LuceneHelperImpl.java.html"><b><i>View Source</i></b></a>
58   *
59   * @author Brian Wing Shun Chan
60   * @author Harry Mark
61   * @author Bruno Farache
62   */
63  public class LuceneHelperImpl implements LuceneHelper {
64  
65      public void addDocument(long companyId, Document document)
66          throws IOException {
67  
68          IndexAccessor indexAccessor = _getIndexAccessor(companyId);
69  
70          indexAccessor.addDocument(document);
71      }
72  
73      public void addExactTerm(
74          BooleanQuery booleanQuery, String field, String value) {
75  
76          //text = KeywordsUtil.escape(value);
77  
78          Query query = new TermQuery(new Term(field, value));
79  
80          booleanQuery.add(query, BooleanClause.Occur.SHOULD);
81      }
82  
83      public void addRequiredTerm(
84          BooleanQuery booleanQuery, String field, String value, boolean like) {
85  
86          if (like) {
87              value = StringUtil.replace(
88                  value, StringPool.PERCENT, StringPool.STAR);
89  
90              value = value.toLowerCase();
91  
92              WildcardQuery wildcardQuery = new WildcardQuery(
93                  new Term(field, value));
94  
95              booleanQuery.add(wildcardQuery, BooleanClause.Occur.MUST);
96          }
97          else {
98              //text = KeywordsUtil.escape(value);
99  
100             Term term = new Term(field, value);
101             TermQuery termQuery = new TermQuery(term);
102 
103             booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
104         }
105     }
106 
107     public void addTerm(
108             BooleanQuery booleanQuery, String field, String value, boolean like)
109         throws ParseException {
110 
111         if (Validator.isNull(value)) {
112             return;
113         }
114 
115         if (like) {
116             value = value.toLowerCase();
117 
118             Term term = new Term(
119                 field, StringPool.STAR.concat(value).concat(StringPool.STAR));
120 
121             WildcardQuery wildcardQuery = new WildcardQuery(term);
122 
123             booleanQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
124         }
125         else {
126             QueryParser queryParser = new QueryParser(field, getAnalyzer());
127 
128             try {
129                 Query query = queryParser.parse(value);
130 
131                 booleanQuery.add(query, BooleanClause.Occur.SHOULD);
132             }
133             catch (ParseException pe) {
134                 if (_log.isDebugEnabled()) {
135                     _log.debug(
136                         "ParseException thrown, reverting to literal search",
137                         pe);
138                 }
139 
140                 value = KeywordsUtil.escape(value);
141 
142                 Query query = queryParser.parse(value);
143 
144                 booleanQuery.add(query, BooleanClause.Occur.SHOULD);
145             }
146         }
147     }
148 
149     public void delete(long companyId) {
150         IndexAccessor indexAccessor = _getIndexAccessor(companyId);
151 
152         indexAccessor.delete();
153     }
154 
155     public void deleteDocuments(long companyId, Term term) throws IOException {
156         IndexAccessor indexAccessor = _getIndexAccessor(companyId);
157 
158         indexAccessor.deleteDocuments(term);
159     }
160 
161     public Analyzer getAnalyzer() {
162         try {
163             return (Analyzer)_analyzerClass.newInstance();
164         }
165         catch (Exception e) {
166             throw new RuntimeException(e);
167         }
168     }
169 
170     public String[] getQueryTerms(Query query) {
171         String[] fieldNames = new String[] {
172             Field.CONTENT, Field.DESCRIPTION, Field.PROPERTIES, Field.TITLE,
173             Field.USER_NAME
174         };
175 
176         WeightedTerm[] weightedTerms = null;
177 
178         for (String fieldName : fieldNames) {
179             weightedTerms = QueryTermExtractor.getTerms(
180                 query, false, fieldName);
181 
182             if (weightedTerms.length > 0) {
183                 break;
184             }
185         }
186 
187         Set<String> queryTerms = new HashSet<String>();
188 
189         for (WeightedTerm weightedTerm : weightedTerms) {
190             queryTerms.add(weightedTerm.getTerm());
191         }
192 
193         return queryTerms.toArray(new String[queryTerms.size()]);
194     }
195 
196     public IndexSearcher getSearcher(long companyId, boolean readOnly)
197         throws IOException {
198 
199         IndexAccessor indexAccessor = _getIndexAccessor(companyId);
200 
201         return new IndexSearcher(indexAccessor.getLuceneDir(), readOnly);
202     }
203 
204     public String getSnippet(
205             Query query, String field, String s, int maxNumFragments,
206             int fragmentLength, String fragmentSuffix, String preTag,
207             String postTag)
208         throws IOException {
209 
210         SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
211             preTag, postTag);
212 
213         QueryScorer queryScorer = new QueryScorer(query, field);
214 
215         Highlighter highlighter = new Highlighter(
216             simpleHTMLFormatter, queryScorer);
217 
218         highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength));
219 
220         TokenStream tokenStream = getAnalyzer().tokenStream(
221             field, new UnsyncStringReader(s));
222 
223         try {
224             String snippet = highlighter.getBestFragments(
225                 tokenStream, s, maxNumFragments, fragmentSuffix);
226 
227             if (Validator.isNotNull(snippet) &&
228                 !StringUtil.endsWith(snippet, fragmentSuffix)) {
229 
230                 snippet = snippet + fragmentSuffix;
231             }
232 
233             return snippet;
234         }
235         catch (InvalidTokenOffsetsException itoe) {
236             throw new IOException(itoe.getMessage());
237         }
238     }
239 
240     public void updateDocument(long companyId, Term term, Document document)
241         throws IOException {
242 
243         IndexAccessor indexAccessor = _getIndexAccessor(companyId);
244 
245         indexAccessor.updateDocument(term, document);
246     }
247 
248     public void shutdown() {
249         for (IndexAccessor indexAccessor : _indexAccessorMap.values()) {
250             indexAccessor.close();
251         }
252     }
253 
254     private LuceneHelperImpl() {
255         String analyzerName = PropsUtil.get(PropsKeys.LUCENE_ANALYZER);
256 
257         if (Validator.isNotNull(analyzerName)) {
258             try {
259                 _analyzerClass = Class.forName(analyzerName);
260             }
261             catch (Exception e) {
262                 _log.error(e);
263             }
264         }
265     }
266 
267     private IndexAccessor _getIndexAccessor(long companyId) {
268         IndexAccessor indexAccessor = _indexAccessorMap.get(companyId);
269 
270         if (indexAccessor == null) {
271             synchronized (this) {
272                 indexAccessor = _indexAccessorMap.get(companyId);
273 
274                 if (indexAccessor == null) {
275                     indexAccessor = new IndexAccessorImpl(companyId);
276 
277                     _indexAccessorMap.put(companyId, indexAccessor);
278                 }
279             }
280         }
281 
282         return indexAccessor;
283     }
284 
285     private static Log _log = LogFactoryUtil.getLog(LuceneHelperImpl.class);
286 
287     private Class<?> _analyzerClass = WhitespaceAnalyzer.class;
288     private Map<Long, IndexAccessor> _indexAccessorMap =
289         new ConcurrentHashMap<Long, IndexAccessor>();
290 
291 }