001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.liferay.portal.kernel.io.unsync.UnsyncStringReader;
018 import com.liferay.portal.kernel.log.Log;
019 import com.liferay.portal.kernel.log.LogFactoryUtil;
020 import com.liferay.portal.kernel.search.Field;
021 import com.liferay.portal.kernel.util.CharPool;
022 import com.liferay.portal.kernel.util.PropsKeys;
023 import com.liferay.portal.kernel.util.StringPool;
024 import com.liferay.portal.kernel.util.StringUtil;
025 import com.liferay.portal.kernel.util.Validator;
026 import com.liferay.portal.util.PropsUtil;
027 import com.liferay.util.lucene.KeywordsUtil;
028
029 import java.io.IOException;
030
031 import java.util.HashSet;
032 import java.util.Map;
033 import java.util.Set;
034 import java.util.concurrent.ConcurrentHashMap;
035
036 import org.apache.lucene.analysis.Analyzer;
037 import org.apache.lucene.analysis.TokenStream;
038 import org.apache.lucene.analysis.WhitespaceAnalyzer;
039 import org.apache.lucene.document.Document;
040 import org.apache.lucene.index.Term;
041 import org.apache.lucene.queryParser.ParseException;
042 import org.apache.lucene.queryParser.QueryParser;
043 import org.apache.lucene.search.BooleanClause;
044 import org.apache.lucene.search.BooleanQuery;
045 import org.apache.lucene.search.IndexSearcher;
046 import org.apache.lucene.search.Query;
047 import org.apache.lucene.search.TermQuery;
048 import org.apache.lucene.search.WildcardQuery;
049 import org.apache.lucene.search.highlight.Highlighter;
050 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
051 import org.apache.lucene.search.highlight.QueryScorer;
052 import org.apache.lucene.search.highlight.QueryTermExtractor;
053 import org.apache.lucene.search.highlight.SimpleFragmenter;
054 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
055 import org.apache.lucene.search.highlight.WeightedTerm;
056
057
062 public class LuceneHelperImpl implements LuceneHelper {
063
064 public void addDocument(long companyId, Document document)
065 throws IOException {
066
067 IndexAccessor indexAccessor = _getIndexAccessor(companyId);
068
069 indexAccessor.addDocument(document);
070 }
071
072 public void addExactTerm(
073 BooleanQuery booleanQuery, String field, String value) {
074
075
076
077 Query query = new TermQuery(new Term(field, value));
078
079 booleanQuery.add(query, BooleanClause.Occur.SHOULD);
080 }
081
082 public void addRequiredTerm(
083 BooleanQuery booleanQuery, String field, String value, boolean like) {
084
085 if (like) {
086 value = StringUtil.replace(
087 value, CharPool.PERCENT, CharPool.STAR);
088
089 value = value.toLowerCase();
090
091 WildcardQuery wildcardQuery = new WildcardQuery(
092 new Term(field, value));
093
094 booleanQuery.add(wildcardQuery, BooleanClause.Occur.MUST);
095 }
096 else {
097
098
099 Term term = new Term(field, value);
100 TermQuery termQuery = new TermQuery(term);
101
102 booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
103 }
104 }
105
106 public void addTerm(
107 BooleanQuery booleanQuery, String field, String value, boolean like)
108 throws ParseException {
109
110 if (Validator.isNull(value)) {
111 return;
112 }
113
114 if (like) {
115 value = StringUtil.replace(
116 value, StringPool.PERCENT, StringPool.BLANK);
117
118 value = value.toLowerCase();
119
120 Term term = new Term(
121 field, StringPool.STAR.concat(value).concat(StringPool.STAR));
122
123 WildcardQuery wildcardQuery = new WildcardQuery(term);
124
125 booleanQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
126 }
127 else {
128 QueryParser queryParser = new QueryParser(field, getAnalyzer());
129
130 try {
131 Query query = queryParser.parse(value);
132
133 booleanQuery.add(query, BooleanClause.Occur.SHOULD);
134 }
135 catch (ParseException pe) {
136 if (_log.isDebugEnabled()) {
137 _log.debug(
138 "ParseException thrown, reverting to literal search",
139 pe);
140 }
141
142 value = KeywordsUtil.escape(value);
143
144 Query query = queryParser.parse(value);
145
146 booleanQuery.add(query, BooleanClause.Occur.SHOULD);
147 }
148 }
149 }
150
151 public void delete(long companyId) {
152 IndexAccessor indexAccessor = _getIndexAccessor(companyId);
153
154 indexAccessor.delete();
155 }
156
157 public void deleteDocuments(long companyId, Term term) throws IOException {
158 IndexAccessor indexAccessor = _getIndexAccessor(companyId);
159
160 indexAccessor.deleteDocuments(term);
161 }
162
163 public Analyzer getAnalyzer() {
164 try {
165 return (Analyzer)_analyzerClass.newInstance();
166 }
167 catch (Exception e) {
168 throw new RuntimeException(e);
169 }
170 }
171
172 public String[] getQueryTerms(Query query) {
173 String[] fieldNames = new String[] {
174 Field.CONTENT, Field.DESCRIPTION, Field.PROPERTIES, Field.TITLE,
175 Field.USER_NAME
176 };
177
178 WeightedTerm[] weightedTerms = null;
179
180 for (String fieldName : fieldNames) {
181 weightedTerms = QueryTermExtractor.getTerms(
182 query, false, fieldName);
183
184 if (weightedTerms.length > 0) {
185 break;
186 }
187 }
188
189 Set<String> queryTerms = new HashSet<String>();
190
191 for (WeightedTerm weightedTerm : weightedTerms) {
192 queryTerms.add(weightedTerm.getTerm());
193 }
194
195 return queryTerms.toArray(new String[queryTerms.size()]);
196 }
197
198 public IndexSearcher getSearcher(long companyId, boolean readOnly)
199 throws IOException {
200
201 IndexAccessor indexAccessor = _getIndexAccessor(companyId);
202
203 return new IndexSearcher(indexAccessor.getLuceneDir(), readOnly);
204 }
205
206 public String getSnippet(
207 Query query, String field, String s, int maxNumFragments,
208 int fragmentLength, String fragmentSuffix, String preTag,
209 String postTag)
210 throws IOException {
211
212 SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
213 preTag, postTag);
214
215 QueryScorer queryScorer = new QueryScorer(query, field);
216
217 Highlighter highlighter = new Highlighter(
218 simpleHTMLFormatter, queryScorer);
219
220 highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength));
221
222 TokenStream tokenStream = getAnalyzer().tokenStream(
223 field, new UnsyncStringReader(s));
224
225 try {
226 String snippet = highlighter.getBestFragments(
227 tokenStream, s, maxNumFragments, fragmentSuffix);
228
229 if (Validator.isNotNull(snippet) &&
230 !StringUtil.endsWith(snippet, fragmentSuffix)) {
231
232 snippet = snippet + fragmentSuffix;
233 }
234
235 return snippet;
236 }
237 catch (InvalidTokenOffsetsException itoe) {
238 throw new IOException(itoe.getMessage());
239 }
240 }
241
242 public void updateDocument(long companyId, Term term, Document document)
243 throws IOException {
244
245 IndexAccessor indexAccessor = _getIndexAccessor(companyId);
246
247 indexAccessor.updateDocument(term, document);
248 }
249
250 public void shutdown() {
251 for (IndexAccessor indexAccessor : _indexAccessorMap.values()) {
252 indexAccessor.close();
253 }
254 }
255
256 private LuceneHelperImpl() {
257 String analyzerName = PropsUtil.get(PropsKeys.LUCENE_ANALYZER);
258
259 if (Validator.isNotNull(analyzerName)) {
260 try {
261 _analyzerClass = Class.forName(analyzerName);
262 }
263 catch (Exception e) {
264 _log.error(e);
265 }
266 }
267 }
268
269 private IndexAccessor _getIndexAccessor(long companyId) {
270 IndexAccessor indexAccessor = _indexAccessorMap.get(companyId);
271
272 if (indexAccessor == null) {
273 synchronized (this) {
274 indexAccessor = _indexAccessorMap.get(companyId);
275
276 if (indexAccessor == null) {
277 indexAccessor = new IndexAccessorImpl(companyId);
278
279 _indexAccessorMap.put(companyId, indexAccessor);
280 }
281 }
282 }
283
284 return indexAccessor;
285 }
286
287 private static Log _log = LogFactoryUtil.getLog(LuceneHelperImpl.class);
288
289 private Class<?> _analyzerClass = WhitespaceAnalyzer.class;
290 private Map<Long, IndexAccessor> _indexAccessorMap =
291 new ConcurrentHashMap<Long, IndexAccessor>();
292
293 }