001    /**
002     * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
003     *
004     * The contents of this file are subject to the terms of the Liferay Enterprise
005     * Subscription License ("License"). You may not use this file except in
006     * compliance with the License. You can obtain a copy of the License by
007     * contacting Liferay, Inc. See the License for the specific language governing
008     * permissions and limitations under the License, including but not limited to
009     * distribution rights of the Software.
010     *
011     *
012     *
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.io.unsync.UnsyncStringReader;
018    import com.liferay.portal.kernel.log.Log;
019    import com.liferay.portal.kernel.log.LogFactoryUtil;
020    import com.liferay.portal.kernel.search.Field;
021    import com.liferay.portal.kernel.util.CharPool;
022    import com.liferay.portal.kernel.util.PropsKeys;
023    import com.liferay.portal.kernel.util.StringPool;
024    import com.liferay.portal.kernel.util.StringUtil;
025    import com.liferay.portal.kernel.util.Validator;
026    import com.liferay.portal.util.PropsUtil;
027    import com.liferay.util.lucene.KeywordsUtil;
028    
029    import java.io.IOException;
030    
031    import java.util.HashSet;
032    import java.util.Map;
033    import java.util.Set;
034    import java.util.concurrent.ConcurrentHashMap;
035    
036    import org.apache.lucene.analysis.Analyzer;
037    import org.apache.lucene.analysis.TokenStream;
038    import org.apache.lucene.analysis.WhitespaceAnalyzer;
039    import org.apache.lucene.document.Document;
040    import org.apache.lucene.index.Term;
041    import org.apache.lucene.queryParser.ParseException;
042    import org.apache.lucene.queryParser.QueryParser;
043    import org.apache.lucene.search.BooleanClause;
044    import org.apache.lucene.search.BooleanQuery;
045    import org.apache.lucene.search.IndexSearcher;
046    import org.apache.lucene.search.Query;
047    import org.apache.lucene.search.TermQuery;
048    import org.apache.lucene.search.WildcardQuery;
049    import org.apache.lucene.search.highlight.Highlighter;
050    import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
051    import org.apache.lucene.search.highlight.QueryScorer;
052    import org.apache.lucene.search.highlight.QueryTermExtractor;
053    import org.apache.lucene.search.highlight.SimpleFragmenter;
054    import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
055    import org.apache.lucene.search.highlight.WeightedTerm;
056    
057    /**
058     * @author Brian Wing Shun Chan
059     * @author Harry Mark
060     * @author Bruno Farache
061     */
062    public class LuceneHelperImpl implements LuceneHelper {
063    
064            public void addDocument(long companyId, Document document)
065                    throws IOException {
066    
067                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
068    
069                    indexAccessor.addDocument(document);
070            }
071    
072            public void addExactTerm(
073                    BooleanQuery booleanQuery, String field, String value) {
074    
075                    //text = KeywordsUtil.escape(value);
076    
077                    Query query = new TermQuery(new Term(field, value));
078    
079                    booleanQuery.add(query, BooleanClause.Occur.SHOULD);
080            }
081    
082            public void addRequiredTerm(
083                    BooleanQuery booleanQuery, String field, String value, boolean like) {
084    
085                    if (like) {
086                            value = StringUtil.replace(
087                                    value, CharPool.PERCENT, CharPool.STAR);
088    
089                            value = value.toLowerCase();
090    
091                            WildcardQuery wildcardQuery = new WildcardQuery(
092                                    new Term(field, value));
093    
094                            booleanQuery.add(wildcardQuery, BooleanClause.Occur.MUST);
095                    }
096                    else {
097                            //text = KeywordsUtil.escape(value);
098    
099                            Term term = new Term(field, value);
100                            TermQuery termQuery = new TermQuery(term);
101    
102                            booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
103                    }
104            }
105    
106            public void addTerm(
107                            BooleanQuery booleanQuery, String field, String value, boolean like)
108                    throws ParseException {
109    
110                    if (Validator.isNull(value)) {
111                            return;
112                    }
113    
114                    if (like) {
115                            value = StringUtil.replace(
116                                    value, StringPool.PERCENT, StringPool.BLANK);
117    
118                            value = value.toLowerCase();
119    
120                            Term term = new Term(
121                                    field, StringPool.STAR.concat(value).concat(StringPool.STAR));
122    
123                            WildcardQuery wildcardQuery = new WildcardQuery(term);
124    
125                            booleanQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
126                    }
127                    else {
128                            QueryParser queryParser = new QueryParser(field, getAnalyzer());
129    
130                            try {
131                                    Query query = queryParser.parse(value);
132    
133                                    booleanQuery.add(query, BooleanClause.Occur.SHOULD);
134                            }
135                            catch (ParseException pe) {
136                                    if (_log.isDebugEnabled()) {
137                                            _log.debug(
138                                                    "ParseException thrown, reverting to literal search",
139                                                    pe);
140                                    }
141    
142                                    value = KeywordsUtil.escape(value);
143    
144                                    Query query = queryParser.parse(value);
145    
146                                    booleanQuery.add(query, BooleanClause.Occur.SHOULD);
147                            }
148                    }
149            }
150    
151            public void delete(long companyId) {
152                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
153    
154                    indexAccessor.delete();
155            }
156    
157            public void deleteDocuments(long companyId, Term term) throws IOException {
158                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
159    
160                    indexAccessor.deleteDocuments(term);
161            }
162    
163            public Analyzer getAnalyzer() {
164                    try {
165                            return (Analyzer)_analyzerClass.newInstance();
166                    }
167                    catch (Exception e) {
168                            throw new RuntimeException(e);
169                    }
170            }
171    
172            public String[] getQueryTerms(Query query) {
173                    String[] fieldNames = new String[] {
174                            Field.CONTENT, Field.DESCRIPTION, Field.PROPERTIES, Field.TITLE,
175                            Field.USER_NAME
176                    };
177    
178                    WeightedTerm[] weightedTerms = null;
179    
180                    for (String fieldName : fieldNames) {
181                            weightedTerms = QueryTermExtractor.getTerms(
182                                    query, false, fieldName);
183    
184                            if (weightedTerms.length > 0) {
185                                    break;
186                            }
187                    }
188    
189                    Set<String> queryTerms = new HashSet<String>();
190    
191                    for (WeightedTerm weightedTerm : weightedTerms) {
192                            queryTerms.add(weightedTerm.getTerm());
193                    }
194    
195                    return queryTerms.toArray(new String[queryTerms.size()]);
196            }
197    
198            public IndexSearcher getSearcher(long companyId, boolean readOnly)
199                    throws IOException {
200    
201                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
202    
203                    return new IndexSearcher(indexAccessor.getLuceneDir(), readOnly);
204            }
205    
206            public String getSnippet(
207                            Query query, String field, String s, int maxNumFragments,
208                            int fragmentLength, String fragmentSuffix, String preTag,
209                            String postTag)
210                    throws IOException {
211    
212                    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
213                            preTag, postTag);
214    
215                    QueryScorer queryScorer = new QueryScorer(query, field);
216    
217                    Highlighter highlighter = new Highlighter(
218                            simpleHTMLFormatter, queryScorer);
219    
220                    highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength));
221    
222                    TokenStream tokenStream = getAnalyzer().tokenStream(
223                            field, new UnsyncStringReader(s));
224    
225                    try {
226                            String snippet = highlighter.getBestFragments(
227                                    tokenStream, s, maxNumFragments, fragmentSuffix);
228    
229                            if (Validator.isNotNull(snippet) &&
230                                    !StringUtil.endsWith(snippet, fragmentSuffix)) {
231    
232                                    snippet = snippet + fragmentSuffix;
233                            }
234    
235                            return snippet;
236                    }
237                    catch (InvalidTokenOffsetsException itoe) {
238                            throw new IOException(itoe.getMessage());
239                    }
240            }
241    
242            public void updateDocument(long companyId, Term term, Document document)
243                    throws IOException {
244    
245                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
246    
247                    indexAccessor.updateDocument(term, document);
248            }
249    
250            public void shutdown() {
251                    for (IndexAccessor indexAccessor : _indexAccessorMap.values()) {
252                            indexAccessor.close();
253                    }
254            }
255    
256            private LuceneHelperImpl() {
257                    String analyzerName = PropsUtil.get(PropsKeys.LUCENE_ANALYZER);
258    
259                    if (Validator.isNotNull(analyzerName)) {
260                            try {
261                                    _analyzerClass = Class.forName(analyzerName);
262                            }
263                            catch (Exception e) {
264                                    _log.error(e);
265                            }
266                    }
267            }
268    
269            private IndexAccessor _getIndexAccessor(long companyId) {
270                    IndexAccessor indexAccessor = _indexAccessorMap.get(companyId);
271    
272                    if (indexAccessor == null) {
273                            synchronized (this) {
274                                    indexAccessor = _indexAccessorMap.get(companyId);
275    
276                                    if (indexAccessor == null) {
277                                            indexAccessor = new IndexAccessorImpl(companyId);
278    
279                                            _indexAccessorMap.put(companyId, indexAccessor);
280                                    }
281                            }
282                    }
283    
284                    return indexAccessor;
285            }
286    
287            private static Log _log = LogFactoryUtil.getLog(LuceneHelperImpl.class);
288    
289            private Class<?> _analyzerClass = WhitespaceAnalyzer.class;
290            private Map<Long, IndexAccessor> _indexAccessorMap =
291                    new ConcurrentHashMap<Long, IndexAccessor>();
292    
293    }