001    /**
002     * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
003     *
004     * The contents of this file are subject to the terms of the Liferay Enterprise
005     * Subscription License ("License"). You may not use this file except in
006     * compliance with the License. You can obtain a copy of the License by
007     * contacting Liferay, Inc. See the License for the specific language governing
008     * permissions and limitations under the License, including but not limited to
009     * distribution rights of the Software.
010     *
011     *
012     *
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.dao.orm.QueryUtil;
018    import com.liferay.portal.kernel.log.Log;
019    import com.liferay.portal.kernel.log.LogFactoryUtil;
020    import com.liferay.portal.kernel.search.Document;
021    import com.liferay.portal.kernel.search.DocumentImpl;
022    import com.liferay.portal.kernel.search.Field;
023    import com.liferay.portal.kernel.search.Hits;
024    import com.liferay.portal.kernel.search.HitsImpl;
025    import com.liferay.portal.kernel.search.IndexSearcher;
026    import com.liferay.portal.kernel.search.Query;
027    import com.liferay.portal.kernel.search.SearchException;
028    import com.liferay.portal.kernel.search.Sort;
029    import com.liferay.portal.kernel.util.StringUtil;
030    import com.liferay.portal.kernel.util.Time;
031    import com.liferay.portal.kernel.util.Validator;
032    
033    import java.io.IOException;
034    
035    import java.util.List;
036    
037    import org.apache.lucene.queryParser.ParseException;
038    import org.apache.lucene.search.BooleanQuery;
039    import org.apache.lucene.search.SortField;
040    
041    /**
042     * @author Bruno Farache
043     */
044    public class LuceneIndexSearcherImpl implements IndexSearcher {
045    
046            public Hits search(
047                            long companyId, Query query, Sort[] sorts, int start, int end)
048                    throws SearchException {
049    
050                    if (_log.isDebugEnabled()) {
051                            _log.debug("Query " + query);
052                    }
053    
054                    Hits hits = null;
055    
056                    org.apache.lucene.search.IndexSearcher searcher = null;
057                    org.apache.lucene.search.Sort luceneSort = null;
058    
059                    try {
060                            searcher = LuceneHelperUtil.getSearcher(companyId, true);
061    
062                            if (sorts != null) {
063                                    searcher.setDefaultFieldSortScoring(true, true);
064    
065                                    SortField[] sortFields = new SortField[sorts.length];
066    
067                                    for (int i = 0; i < sorts.length; i++) {
068                                            Sort sort = sorts[i];
069    
070                                            sortFields[i] = new SortField(
071                                                    sort.getFieldName(), sort.getType(), sort.isReverse());
072                                    }
073    
074                                    luceneSort = new org.apache.lucene.search.Sort(sortFields);
075                            }
076    
077                            long startTime = System.currentTimeMillis();
078    
079                            org.apache.lucene.search.Hits luceneHits = searcher.search(
080                                    QueryTranslator.translate(query), luceneSort);
081    
082                            long endTime = System.currentTimeMillis();
083    
084                            float searchTime = (float)(endTime - startTime) / Time.SECOND;
085    
086                            hits = subset(luceneHits, query, startTime, searchTime, start, end);
087                    }
088                    catch (BooleanQuery.TooManyClauses tmc) {
089                            int maxClauseCount = BooleanQuery.getMaxClauseCount();
090    
091                            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
092    
093                            try {
094                                    long startTime = System.currentTimeMillis();
095    
096                                    org.apache.lucene.search.Hits luceneHits = searcher.search(
097                                            QueryTranslator.translate(query), luceneSort);
098    
099                                    long endTime = System.currentTimeMillis();
100    
101                                    float searchTime = (float)(endTime - startTime) / Time.SECOND;
102    
103                                    hits = subset(
104                                            luceneHits, query, startTime, searchTime, start, end);
105                            }
106                            catch (Exception e) {
107                                    throw new SearchException(e);
108                            }
109                            finally {
110                                    BooleanQuery.setMaxClauseCount(maxClauseCount);
111                            }
112                    }
113                    catch (ParseException pe) {
114                            _log.error("Query: " + query, pe);
115    
116                            return new HitsImpl();
117                    }
118                    catch (Exception e) {
119                            throw new SearchException(e);
120                    }
121                    finally {
122                            try {
123                                    if (searcher != null) {
124                                            searcher.close();
125                                    }
126                            }
127                            catch (IOException ioe) {
128                                    throw new SearchException(ioe);
129                            }
130                    }
131    
132                    if (_log.isDebugEnabled()) {
133                            _log.debug(
134                                    "Search found " + hits.getLength() + " results in " +
135                                            hits.getSearchTime() + "ms");
136                    }
137    
138                    return hits;
139            }
140    
141            protected DocumentImpl getDocument(
142                    org.apache.lucene.document.Document oldDoc) {
143    
144                    DocumentImpl newDoc = new DocumentImpl();
145    
146                    List<org.apache.lucene.document.Field> oldFields = oldDoc.getFields();
147    
148                    for (org.apache.lucene.document.Field oldField : oldFields) {
149                            String[] values = oldDoc.getValues(oldField.name());
150    
151                            if ((values != null) && (values.length > 1)) {
152                                    Field newField = new Field(
153                                            oldField.name(), values, oldField.isTokenized());
154    
155                                    newDoc.add(newField);
156                            }
157                            else {
158                                    Field newField = new Field(
159                                            oldField.name(), oldField.stringValue(),
160                                            oldField.isTokenized());
161    
162                                    newDoc.add(newField);
163                            }
164                    }
165    
166                    return newDoc;
167            }
168    
169            protected String[] getQueryTerms(Query query) {
170                    String[] queryTerms = new String[0];
171    
172                    try {
173                            queryTerms = LuceneHelperUtil.getQueryTerms(
174                                    QueryTranslator.translate(query));
175                    }
176                    catch (ParseException pe) {
177                            _log.error("Query: " + query, pe);
178                    }
179    
180                    return queryTerms;
181            }
182    
183            protected String getSnippet(
184                            org.apache.lucene.document.Document doc, Query query, String field)
185                    throws IOException {
186    
187                    String[] values = doc.getValues(field);
188    
189                    String snippet = null;
190    
191                    if (Validator.isNull(values)) {
192                            return snippet;
193                    }
194    
195                    String s = StringUtil.merge(values);
196    
197                    try {
198                            snippet = LuceneHelperUtil.getSnippet(
199                                    QueryTranslator.translate(query), field, s);
200                    }
201                    catch (ParseException pe) {
202                            _log.error("Query: " + query, pe);
203                    }
204    
205                    return snippet;
206            }
207    
208            protected Hits subset(
209                            org.apache.lucene.search.Hits luceneHits, Query query,
210                            long startTime, float searchTime, int start, int end)
211                    throws IOException {
212    
213                    int length = luceneHits.length();
214    
215                    if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
216                            start = 0;
217                            end = length;
218                    }
219    
220                    String[] queryTerms = getQueryTerms(query);
221    
222                    Hits subset = new HitsImpl();
223    
224                    if ((start > - 1) && (start <= end)) {
225                            if (end > length) {
226                                    end = length;
227                            }
228    
229                            int subsetTotal = end - start;
230    
231                            Document[] subsetDocs = new DocumentImpl[subsetTotal];
232                            String[] subsetSnippets = new String[subsetTotal];
233                            float[] subsetScores = new float[subsetTotal];
234    
235                            int j = 0;
236    
237                            for (int i = start; i < end; i++, j++) {
238                                    org.apache.lucene.document.Document doc = luceneHits.doc(i);
239    
240                                    subsetDocs[j] = getDocument(doc);
241                                    subsetSnippets[j] = getSnippet(doc, query, Field.CONTENT);
242                                    subsetScores[j] = luceneHits.score(i);
243                            }
244    
245                            subset.setStart(startTime);
246                            subset.setSearchTime(searchTime);
247                            subset.setQueryTerms(queryTerms);
248                            subset.setDocs(subsetDocs);
249                            subset.setLength(length);
250                            subset.setSnippets(subsetSnippets);
251                            subset.setScores(subsetScores);
252                    }
253    
254                    return subset;
255            }
256    
257            private static Log _log = LogFactoryUtil.getLog(
258                    LuceneIndexSearcherImpl.class);
259    
260    }