File Source: searchindex.java
/*
P/P * Method: net.sourceforge.pebble.index.SearchIndex__static_init
*
* Postconditions:
* init'ed(log)
*/
1 /*
2 * Copyright (c) 2003-2006, Simon Brown
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * - Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * - Neither the name of Pebble nor the names of its contributors may
17 * be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32 package net.sourceforge.pebble.index;
33
34 import net.sourceforge.pebble.domain.*;
35 import net.sourceforge.pebble.search.SearchException;
36 import net.sourceforge.pebble.search.SearchHit;
37 import net.sourceforge.pebble.search.SearchResults;
38 import org.apache.commons.logging.Log;
39 import org.apache.commons.logging.LogFactory;
/*
P/P * Method: void net.sourceforge.pebble.index.SearchIndex(Blog)
*
* Postconditions:
* this.blog == blog
* init'ed(this.blog)
*/
40 import org.apache.lucene.analysis.Analyzer;
41 import org.apache.lucene.document.DateField;
42 import org.apache.lucene.document.Document;
43 import org.apache.lucene.document.Field;
44 import org.apache.lucene.index.IndexReader;
45 import org.apache.lucene.index.IndexWriter;
46 import org.apache.lucene.index.Term;
47 import org.apache.lucene.queryParser.ParseException;
48 import org.apache.lucene.queryParser.QueryParser;
49 import org.apache.lucene.search.Hits;
50 import org.apache.lucene.search.IndexSearcher;
51 import org.apache.lucene.search.Query;
52 import org.apache.lucene.search.Searcher;
53
54 import java.io.File;
55 import java.io.IOException;
56 import java.util.Iterator;
57 import java.util.List;
58 import java.util.Collection;
59
60 /**
61 * Wraps up the functionality to index blog entries. This is really just
62 * a convenient wrapper around Lucene.
63 *
64 * @author Simon Brown
65 */
66 public class SearchIndex {
67
68 /** the log used by this class */
69 private static final Log log = LogFactory.getLog(SearchIndex.class);
70
71 private final Blog blog;
72
73 public SearchIndex(Blog blog) {
74 this.blog = blog;
75 }
76
77 /**
78 * Clears the index.
79 */
80 public void clear() {
81 File searchDirectory = new File(blog.getSearchIndexDirectory());
82 if (!searchDirectory.exists()) {
83 searchDirectory.mkdirs();
84 }
85
86 synchronized (blog) {
87 try {
/*
P/P * Method: void clear()
*
* Preconditions:
* this.blog != null
* (soft) this.blog.properties != null
*
* Presumptions:
* org.apache.commons.logging.LogFactory:getLog(...)@69 != null
*
* Test Vectors:
* java.io.File:exists(...)@82: {1}, {0}
*/
88 Analyzer analyzer = getAnalyzer();
89 IndexWriter writer = new IndexWriter(searchDirectory, analyzer, true);
90 writer.close();
91 } catch (Exception e) {
92 log.error(e.getMessage(), e);
93 }
94 }
95 }
96
97 /**
98 * Allows a collection of blog entries to be indexed.
99 */
100 public void indexBlogEntries(Collection<BlogEntry> blogEntries) {
101 synchronized (blog) {
102 try {
/*
P/P * Method: void indexBlogEntries(Collection)
*
* Preconditions:
* (soft) blogEntries != null
* (soft) net.sourceforge.pebble.domain.State__static_init.new State(State__static_init#1).name != null
* (soft) net.sourceforge.pebble.domain.State__static_init.new State(State__static_init#5).name != null
* (soft) net/sourceforge/pebble/domain/BlogManager.instance != null
* (soft) init'ed(net/sourceforge/pebble/domain/BlogManager.instance.multiBlog)
* (soft) this.blog != null
* (soft) this.blog.properties != null
*
* Presumptions:
* blogEntry.blog.rootCategory@106 != null
* blogEntry.blog@106 != null
* blogEntry.comments@106 != null
* blogEntry.state@106 != null
* blogEntry.tagsAsList@106 != null
* ...
*
* Test Vectors:
* java.util.Iterator:hasNext(...)@106: {1}, {0}
*/
103 Analyzer analyzer = getAnalyzer();
104 IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);
105
106 for (BlogEntry blogEntry : blogEntries) {
107 index(blogEntry, writer);
108 }
109
110 writer.close();
111 } catch (Exception e) {
112 log.error(e.getMessage(), e);
113 }
114 }
115 }
116
117 /**
118 * Allows a collection of static pages to be indexed.
119 */
120 public void indexStaticPages(Collection<StaticPage> staticPages) {
121 synchronized (blog) {
122 try {
/*
P/P * Method: void indexStaticPages(Collection)
*
* Preconditions:
* (soft) net/sourceforge/pebble/domain/BlogManager.instance != null
* (soft) init'ed(net/sourceforge/pebble/domain/BlogManager.instance.multiBlog)
* (soft) staticPages != null
* (soft) this.blog != null
* (soft) this.blog.properties != null
*
* Presumptions:
* java.util.Iterator:next(...)@126 != null
* org.apache.commons.logging.LogFactory:getLog(...)@69 != null
* staticPage.blog@126 != null
*
* Test Vectors:
* java.util.Iterator:hasNext(...)@126: {1}, {0}
*/
123 Analyzer analyzer = getAnalyzer();
124 IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);
125
126 for (StaticPage staticPage : staticPages) {
127 index(staticPage, writer);
128 }
129
130 writer.close();
131 } catch (Exception e) {
132 log.error(e.getMessage(), e);
133 }
134 }
135 }
136
137 /**
138 * Allows a single blog entry to be (re)indexed. If the entry is already
139 * indexed, this method deletes the previous index before adding the new
140 * one.
141 *
142 * @param blogEntry the BlogEntry instance to index
143 */
144 public void index(BlogEntry blogEntry) {
145 try {
146 synchronized (blog) {
147 // first delete the blog entry from the index (if it was there)
148 unindex(blogEntry);
149
/*
P/P * Method: void index(BlogEntry)
*
* Preconditions:
* (soft) blogEntry != null
* (soft) init'ed(blogEntry.author)
* (soft) blogEntry.blog != null
* (soft) init'ed(blogEntry.blog.id)
* (soft) blogEntry.blog.rootCategory != null
* (soft) init'ed(blogEntry.blog.rootCategory...parent)
* (soft) init'ed(blogEntry.blog.rootCategory...tagsAsList)
* (soft) init'ed(blogEntry.blog.rootCategory.parent)
* (soft) init'ed(blogEntry.blog.rootCategory.tagsAsList)
* (soft) init'ed(blogEntry.body)
* ...
*
* Presumptions:
* org.apache.commons.logging.LogFactory:getLog(...)@69 != null
*
* Postconditions:
* possibly_updated(blogEntry.permalink)
*/
150 Analyzer analyzer = getAnalyzer();
151 IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);
152 index(blogEntry, writer);
153 writer.close();
154 }
155 } catch (Exception e) {
156 log.error(e.getMessage(), e);
157 }
158 }
159
160 /**
161 * Allows a single static page to be (re)indexed. If the page is already
162 * indexed, this method deletes the previous index before adding the new
163 * one.
164 *
165 * @param staticPage the StaticPage instance to index
166 */
167 public void index(StaticPage staticPage) {
168 try {
169 synchronized (blog) {
170 // first delete the static page from the index (if it was there)
171 unindex(staticPage);
172
/*
P/P * Method: void index(StaticPage)
*
* Preconditions:
* (soft) net/sourceforge/pebble/domain/BlogManager.instance != null
* (soft) init'ed(net/sourceforge/pebble/domain/BlogManager.instance.multiBlog)
* (soft) staticPage != null
* (soft) init'ed(staticPage.author)
* (soft) staticPage.blog != null
* (soft) init'ed(staticPage.blog.id)
* (soft) init'ed(staticPage.body)
* (soft) init'ed(staticPage.date)
* (soft) init'ed(staticPage.excerpt)
* (soft) init'ed(staticPage.id)
* ...
*
* Presumptions:
* org.apache.commons.logging.LogFactory:getLog(...)@69 != null
*
* Postconditions:
* possibly_updated(staticPage.permalink)
*/
173 Analyzer analyzer = getAnalyzer();
174 IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);
175 index(staticPage, writer);
176 writer.close();
177 }
178 } catch (Exception e) {
179 log.error(e.getMessage(), e);
180 }
181 }
182
183 /**
184 * Gets the Analyzer implementation to use.
185 *
186 * @return an Analyzer instance
187 * @throws Exception
188 */
189 private Analyzer getAnalyzer() throws Exception {
/*
P/P * Method: Analyzer getAnalyzer()
*
* Preconditions:
* this.blog != null
* this.blog.properties != null
*
* Presumptions:
* java.lang.Class:forName(...)@190 != null
*
* Postconditions:
* return_value != null
*/
190 Class c = Class.forName(blog.getLuceneAnalyzer());
191 return (Analyzer)c.newInstance();
192 }
193
194 /**
195 * Removes the index for a single blog entry to be removed.
196 *
197 * @param blogEntry the BlogEntry instance to be removed
198 */
199 public void unindex(BlogEntry blogEntry) {
200 try {
201 synchronized (blog) {
202 log.debug("Attempting to delete index for " + blogEntry.getTitle());
/*
P/P * Method: void unindex(BlogEntry)
*
* Preconditions:
* (soft) blogEntry != null
* (soft) init'ed(blogEntry.id)
* (soft) init'ed(blogEntry.title)
* (soft) this.blog != null
*
* Presumptions:
* org.apache.commons.logging.LogFactory:getLog(...)@69 != null
* org.apache.lucene.index.IndexReader:open(...)@203 != null
*/
203 IndexReader reader = IndexReader.open(blog.getSearchIndexDirectory());
204 Term term = new Term("id", blogEntry.getId());
205 log.debug("Deleted " + reader.delete(term) + " document(s) from the index");
206 reader.close();
207 }
208 } catch (Exception e) {
209 log.error(e.getMessage(), e);
210 }
211 }
212
213 /**
214 * Removes the index for a single blog entry to be removed.
215 *
216 * @param staticPage the StaticPage instance to be removed
217 */
218 public void unindex(StaticPage staticPage) {
219 try {
220 synchronized (blog) {
221 log.debug("Attempting to delete index for " + staticPage.getTitle());
/*
P/P * Method: void unindex(StaticPage)
*
* Preconditions:
* (soft) staticPage != null
* (soft) init'ed(staticPage.id)
* (soft) init'ed(staticPage.title)
* (soft) this.blog != null
*
* Presumptions:
* org.apache.commons.logging.LogFactory:getLog(...)@69 != null
* org.apache.lucene.index.IndexReader:open(...)@222 != null
*/
222 IndexReader reader = IndexReader.open(blog.getSearchIndexDirectory());
223 Term term = new Term("id", staticPage.getId());
224 log.debug("Deleted " + reader.delete(term) + " document(s) from the index");
225 reader.close();
226 }
227 } catch (Exception e) {
228 log.error(e.getMessage(), e);
229 }
230 }
231
232 /**
233 * Helper method to index an individual blog entry.
234 *
235 * @param blogEntry the BlogEntry instance to index
236 * @param writer the IndexWriter to index with
237 */
238 private void index(BlogEntry blogEntry, IndexWriter writer) {
/*
P/P * Method: void index(BlogEntry, IndexWriter)
*
* Preconditions:
* blogEntry != null
* blogEntry.state != null
* (soft) init'ed(blogEntry.author)
* (soft) blogEntry.blog != null
* (soft) init'ed(blogEntry.blog.id)
* (soft) blogEntry.blog.rootCategory != null
* (soft) init'ed(blogEntry.blog.rootCategory...parent)
* (soft) init'ed(blogEntry.blog.rootCategory...tagsAsList)
* (soft) init'ed(blogEntry.blog.rootCategory.parent)
* (soft) init'ed(blogEntry.blog.rootCategory.tagsAsList)
* ...
*
* Presumptions:
* comment.state@287 != null
* java.util.Iterator:next(...)@276 != null
* java.util.Iterator:next(...)@280 != null
* java.util.Iterator:next(...)@287 != null
* java.util.Iterator:next(...)@295 != null
* ...
*
* Postconditions:
* possibly_updated(blogEntry.permalink)
*
* Test Vectors:
* blogEntry.author: Addr_Set{null}, Inverse{null}
* blogEntry.body: Addr_Set{null}, Inverse{null}
* blogEntry.title: Addr_Set{null}, Inverse{null}
* java.util.Iterator:hasNext(...)@276: {1}, {0}
* java.util.Iterator:hasNext(...)@280: {1}, {0}
* java.util.Iterator:hasNext(...)@286: {1}, {0}
* java.util.Iterator:hasNext(...)@294: {1}, {0}
* net.sourceforge.pebble.util.StringUtils:truncate(...)@101: Addr_Set{null}, Inverse{null}
*/
239 if (!blogEntry.isPublished()) {
240 return;
241 }
242
243 try {
244 log.debug("Indexing " + blogEntry.getTitle());
245 Document document = new Document();
246 document.add(Field.Keyword("id", blogEntry.getId()));
247 if (blogEntry.getTitle() != null) {
248 document.add(Field.Text("title", blogEntry.getTitle()));
249 } else {
250 document.add(Field.Text("title", ""));
251 }
252 document.add(Field.Keyword("permalink", blogEntry.getPermalink()));
253 document.add(Field.UnIndexed("date", DateField.dateToString(blogEntry.getDate())));
254 if (blogEntry.getBody() != null) {
255 document.add(Field.UnStored("body", blogEntry.getBody()));
256 } else {
257 document.add(Field.UnStored("body", ""));
258 }
259 if (blogEntry.getTruncatedContent() != null) {
260 document.add(Field.Text("truncatedBody", blogEntry.getTruncatedContent()));
261 } else {
262 document.add(Field.Text("truncatedBody", ""));
263 }
264
265 if (blogEntry.getAuthor() != null) {
266 document.add(Field.Text("author", blogEntry.getAuthor()));
267 }
268
269 // build up one large string with all searchable content
270 // i.e. entry title, entry body and all response bodies
271 StringBuffer searchableContent = new StringBuffer();
272 searchableContent.append(blogEntry.getTitle());
273 searchableContent.append(" ");
274 searchableContent.append(blogEntry.getBody());
275
276 for (Category category : blogEntry.getCategories()) {
277 document.add(Field.Text("category", category.getId()));
278 }
279
280 for (Tag tag : blogEntry.getAllTags()) {
281 document.add(Field.Text("tag", tag.getName()));
282 }
283
284 searchableContent.append(" ");
285 Iterator it = blogEntry.getComments().iterator();
286 while (it.hasNext()) {
287 Comment comment = (Comment)it.next();
288 if (comment.isApproved()) {
289 searchableContent.append(comment.getBody());
290 searchableContent.append(" ");
291 }
292 }
293 it = blogEntry.getTrackBacks().iterator();
294 while (it.hasNext()) {
295 TrackBack trackBack = (TrackBack)it.next();
296 if (trackBack.isApproved()) {
297 searchableContent.append(trackBack.getExcerpt());
298 searchableContent.append(" ");
299 }
300 }
301
302 // join the title and body together to make searching on them both easier
303 document.add(Field.UnStored("blogEntry", searchableContent.toString()));
304
305 writer.addDocument(document);
306 } catch (Exception e) {
307 log.error(e.getMessage(), e);
308 }
309 }
310 /**
311 * Helper method to index an individual blog entry.
312 *
313 * @param staticPage the Page instance instance to index
314 * @param writer the IndexWriter to index with
315 */
316 private void index(StaticPage staticPage, IndexWriter writer) {
317 try {
/*
P/P * Method: void index(StaticPage, IndexWriter)
*
* Preconditions:
* (soft) net/sourceforge/pebble/domain/BlogManager.instance != null
* (soft) init'ed(net/sourceforge/pebble/domain/BlogManager.instance.multiBlog)
* (soft) staticPage != null
* (soft) init'ed(staticPage.author)
* (soft) staticPage.blog != null
* (soft) init'ed(staticPage.blog.id)
* (soft) init'ed(staticPage.body)
* (soft) init'ed(staticPage.date)
* (soft) init'ed(staticPage.excerpt)
* (soft) init'ed(staticPage.id)
* ...
*
* Presumptions:
* org.apache.commons.logging.LogFactory:getLog(...)@69 != null
*
* Postconditions:
* possibly_updated(staticPage.permalink)
*
* Test Vectors:
* staticPage.author: Addr_Set{null}, Inverse{null}
* staticPage.body: Addr_Set{null}, Inverse{null}
* staticPage.title: Addr_Set{null}, Inverse{null}
* net.sourceforge.pebble.util.StringUtils:truncate(...)@101: Addr_Set{null}, Inverse{null}
*/
318 log.debug("Indexing " + staticPage.getTitle());
319 Document document = new Document();
320 document.add(Field.Keyword("id", staticPage.getId()));
321 if (staticPage.getTitle() != null) {
322 document.add(Field.Text("title", staticPage.getTitle()));
323 } else {
324 document.add(Field.Text("title", ""));
325 }
326 document.add(Field.Keyword("permalink", staticPage.getPermalink()));
327 document.add(Field.UnIndexed("date", DateField.dateToString(staticPage.getDate())));
328 if (staticPage.getBody() != null) {
329 document.add(Field.UnStored("body", staticPage.getBody()));
330 } else {
331 document.add(Field.UnStored("body", ""));
332 }
333 if (staticPage.getTruncatedContent() != null) {
334 document.add(Field.Text("truncatedBody", staticPage.getTruncatedContent()));
335 } else {
336 document.add(Field.Text("truncatedBody", ""));
337 }
338
339 if (staticPage.getAuthor() != null) {
340 document.add(Field.Text("author", staticPage.getAuthor()));
341 }
342
343 // build up one large string with all searchable content
344 // i.e. entry title, entry body and all response bodies
345 StringBuffer searchableContent = new StringBuffer();
346 searchableContent.append(staticPage.getTitle());
347 searchableContent.append(" ");
348 searchableContent.append(staticPage.getBody());
349
350 // join the title and body together to make searching on them both easier
351 document.add(Field.UnStored("blogEntry", searchableContent.toString()));
352
353 writer.addDocument(document);
354 } catch (Exception e) {
355 log.error(e.getMessage(), e);
356 }
357 }
358
359 public SearchResults search(String queryString) throws SearchException {
360
361 log.debug("Performing search : " + queryString);
362
363 SearchResults searchResults = new SearchResults();
364 searchResults.setQuery(queryString);
365
366 if (queryString != null && queryString.length() > 0) {
/*
P/P * Method: SearchResults search(String)
*
* Preconditions:
* (soft) this.blog != null
* (soft) this.blog.properties != null
*
* Presumptions:
* org.apache.commons.logging.LogFactory:getLog(...)@69 != null
* org.apache.lucene.search.Hits:doc(...)@375 != null
* org.apache.lucene.search.Searcher:search(...)@372 != null
*
* Postconditions:
* return_value == &new SearchResults(search#2)
* new SearchResults(search#2) num objects == 1
*
* Test Vectors:
* queryString: Addr_Set{null}, Inverse{null}
* java.lang.String:length(...)@366: {0}, {1..232-1}
*/
367 Searcher searcher = null;
368
369 try {
370 searcher = new IndexSearcher(blog.getSearchIndexDirectory());
371 Query query = QueryParser.parse(queryString, "blogEntry", getAnalyzer());
372 Hits hits = searcher.search(query);
373
374 for (int i = 0; i < hits.length(); i++) {
375 Document doc = hits.doc(i);
376 SearchHit result = new SearchHit(
377 blog,
378 doc.get("id"),
379 doc.get("permalink"),
380 doc.get("title"),
381 doc.get("truncatedBody"),
382 DateField.stringToDate(doc.get("date")),
383 hits.score(i));
384 searchResults.add(result);
385 }
386 } catch (ParseException pe) {
387 pe.printStackTrace();
388 searchResults.setMessage("Sorry, but there was an error. Please try another search");
389 } catch (Exception e) {
390 e.printStackTrace();
391 throw new SearchException(e.getMessage());
392 } finally {
+ 393 if (searcher != null) {
394 try {
395 searcher.close();
396 } catch (IOException e) {
397 // can't do much now! ;-)
398 }
399 }
400 }
401 }
402
403 return searchResults;
404 }
405
406 }
407
SofCheck Inspector Build Version : 2.22510
| searchindex.java |
2010-Jun-25 19:40:32 |
| searchindex.class |
2010-Jul-19 20:23:38 |