Skip to content

Commit b1541ce

Browse files
committed
1 parent f8d25df commit b1541ce

5 files changed

Lines changed: 340 additions & 3 deletions

File tree

CHANGES.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ $Id$
3131
others. (A conjunctive BooleanQuery is a BooleanQuery where all
3232
clauses are required.) (cutting)
3333

34+
4. Added new class ParallelMultiSearcher. Combined with
35+
RemoteSearchable this makes it easy to implement distributed
36+
search systems. (Jean-Francois Halleux via cutting)
37+
3438

3539
1.3 final
3640

src/java/org/apache/lucene/search/MultiSearcher.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ public MultiSearcher(Searchable[] searchables) throws IOException {
8080
}
8181
starts[searchables.length] = maxDoc;
8282
}
83+
84+
protected int[] getStarts() {
85+
return starts;
86+
}
8387

8488
/** Frees resources associated with this <code>Searcher</code>. */
8589
public void close() throws IOException {
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
package org.apache.lucene.search;
2+
3+
/* ====================================================================
4+
* The Apache Software License, Version 1.1
5+
*
6+
* Copyright (c) 2004 The Apache Software Foundation. All rights
7+
* reserved.
8+
*
9+
* Redistribution and use in source and binary forms, with or without
10+
* modification, are permitted provided that the following conditions
11+
* are met:
12+
*
13+
* 1. Redistributions of source code must retain the above copyright
14+
* notice, this list of conditions and the following disclaimer.
15+
*
16+
* 2. Redistributions in binary form must reproduce the above copyright
17+
* notice, this list of conditions and the following disclaimer in
18+
* the documentation and/or other materials provided with the
19+
* distribution.
20+
*
21+
* 3. The end-user documentation included with the redistribution,
22+
* if any, must include the following acknowledgment:
23+
* "This product includes software developed by the
24+
* Apache Software Foundation (http://www.apache.org/)."
25+
* Alternately, this acknowledgment may appear in the software itself,
26+
* if and wherever such third-party acknowledgments normally appear.
27+
*
28+
* 4. The names "Apache" and "Apache Software Foundation" and
29+
* "Apache Lucene" must not be used to endorse or promote products
30+
* derived from this software without prior written permission. For
31+
* written permission, please contact [email protected].
32+
*
33+
* 5. Products derived from this software may not be called "Apache",
34+
* "Apache Lucene", nor may "Apache" appear in their name, without
35+
* prior written permission of the Apache Software Foundation.
36+
*
37+
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38+
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39+
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40+
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41+
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42+
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43+
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44+
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45+
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47+
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48+
* SUCH DAMAGE.
49+
* ====================================================================
50+
*
51+
* This software consists of voluntary contributions made by many
52+
* individuals on behalf of the Apache Software Foundation. For more
53+
* information on the Apache Software Foundation, please see
54+
* <http://www.apache.org/>.
55+
*/
56+
57+
import java.io.IOException;
58+
59+
import org.apache.lucene.index.Term;
60+
61+
/** Implements parallel search over a set of <code>Searchables</code>.
62+
*
63+
* <p>Applications usually need only call the inherited {@link #search(Query)}
64+
* or {@link #search(Query,Filter)} methods.
65+
*/
66+
public class ParallelMultiSearcher extends MultiSearcher {
67+
68+
private Searchable[] searchables;
69+
private int[] starts;
70+
71+
/** Creates a searcher which searches <i>searchables</i>. */
72+
public ParallelMultiSearcher(Searchable[] searchables) throws IOException {
73+
super(searchables);
74+
this.searchables=searchables;
75+
this.starts=getStarts();
76+
}
77+
78+
/**
79+
* TODO: parallelize this one too
80+
*/
81+
public int docFreq(Term term) throws IOException {
82+
int docFreq = 0;
83+
for (int i = 0; i < searchables.length; i++)
84+
docFreq += searchables[i].docFreq(term);
85+
return docFreq;
86+
}
87+
88+
/**
89+
* A search implementation which spans a new thread for each
90+
* Searchable, waits for each search to complete and merge
91+
* the results back together.
92+
*/
93+
public TopDocs search(Query query, Filter filter, int nDocs)
94+
throws IOException {
95+
HitQueue hq = new HitQueue(nDocs);
96+
int totalHits = 0;
97+
MultiSearcherThread[] msta =
98+
new MultiSearcherThread[searchables.length];
99+
for (int i = 0; i < searchables.length; i++) { // search each searcher
100+
// Assume not too many searchables and cost of creating a thread is by far inferior to a search
101+
msta[i] =
102+
new MultiSearcherThread(
103+
searchables[i],
104+
query,
105+
filter,
106+
nDocs,
107+
hq,
108+
i,
109+
starts,
110+
"MultiSearcher thread #" + (i + 1));
111+
msta[i].start();
112+
}
113+
114+
for (int i = 0; i < searchables.length; i++) {
115+
try {
116+
msta[i].join();
117+
} catch (InterruptedException ie) {
118+
; // TODO: what should we do with this???
119+
}
120+
IOException ioe = msta[i].getIOException();
121+
if (ioe == null) {
122+
totalHits += msta[i].hits();
123+
} else {
124+
// if one search produced an IOException, rethrow it
125+
throw ioe;
126+
}
127+
}
128+
129+
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
130+
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
131+
scoreDocs[i] = (ScoreDoc) hq.pop();
132+
133+
return new TopDocs(totalHits, scoreDocs);
134+
}
135+
136+
/** Lower-level search API.
137+
*
138+
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero
139+
* scoring document.
140+
*
141+
* <p>Applications should only use this if they need <i>all</i> of the
142+
* matching documents. The high-level search API ({@link
143+
* Searcher#search(Query)}) is usually more efficient, as it skips
144+
* non-high-scoring hits.
145+
*
146+
* @param query to match documents
147+
* @param filter if non-null, a bitset used to eliminate some documents
148+
* @param results to receive hits
149+
*
150+
* TODO: parallelize this one too
151+
*/
152+
public void search(Query query, Filter filter, final HitCollector results)
153+
throws IOException {
154+
for (int i = 0; i < searchables.length; i++) {
155+
156+
final int start = starts[i];
157+
158+
searchables[i].search(query, filter, new HitCollector() {
159+
public void collect(int doc, float score) {
160+
results.collect(doc + start, score);
161+
}
162+
});
163+
164+
}
165+
}
166+
167+
/*
168+
* TODO: this one could be parallelized too
169+
* @see org.apache.lucene.search.Searchable#rewrite(org.apache.lucene.search.Query)
170+
*/
171+
public Query rewrite(Query original) throws IOException {
172+
Query[] queries = new Query[searchables.length];
173+
for (int i = 0; i < searchables.length; i++) {
174+
queries[i] = searchables[i].rewrite(original);
175+
}
176+
return original.combine(queries);
177+
}
178+
179+
}
180+
181+
/**
182+
* A thread subclass for searching a single searchable
183+
*/
184+
class MultiSearcherThread extends Thread {
185+
186+
private Searchable searchable;
187+
private Query query;
188+
private Filter filter;
189+
private int nDocs;
190+
private int hits;
191+
private TopDocs docs;
192+
private int i;
193+
private HitQueue hq;
194+
private int[] starts;
195+
private IOException ioe;
196+
197+
public MultiSearcherThread(
198+
Searchable searchable,
199+
Query query,
200+
Filter filter,
201+
int nDocs,
202+
HitQueue hq,
203+
int i,
204+
int[] starts,
205+
String name) {
206+
super(name);
207+
this.searchable = searchable;
208+
this.query = query;
209+
this.filter = filter;
210+
this.nDocs = nDocs;
211+
this.hq = hq;
212+
this.i = i;
213+
this.starts = starts;
214+
}
215+
216+
public void run() {
217+
try {
218+
docs = searchable.search(query, filter, nDocs);
219+
}
220+
// Store the IOException for later use by the caller of this thread
221+
catch (IOException ioe) {
222+
this.ioe = ioe;
223+
}
224+
if (ioe == null) {
225+
ScoreDoc[] scoreDocs = docs.scoreDocs;
226+
for (int j = 0;
227+
j < scoreDocs.length;
228+
j++) { // merge scoreDocs into hq
229+
ScoreDoc scoreDoc = scoreDocs[j];
230+
scoreDoc.doc += starts[i]; // convert doc
231+
//it would be so nice if we had a thread-safe insert
232+
synchronized (hq) {
233+
if (!hq.insert(scoreDoc))
234+
break;
235+
} // no more scores > minScore
236+
}
237+
}
238+
}
239+
240+
public int hits() {
241+
return docs.totalHits;
242+
}
243+
244+
public IOException getIOException() {
245+
return ioe;
246+
}
247+
248+
}

src/test/org/apache/lucene/search/TestMultiSearcher.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,14 @@ public TestMultiSearcher(String name)
8181
super(name);
8282
}
8383

84+
/**
85+
* Return a new instance of the concrete MultiSearcher class
86+
* used in this test
87+
*/
88+
protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers) throws IOException {
89+
return new MultiSearcher(searchers);
90+
}
91+
8492
public void testEmptyIndex()
8593
throws Exception
8694
{
@@ -134,7 +142,7 @@ public void testEmptyIndex()
134142
searchers[0] = new IndexSearcher(indexStoreB);
135143
searchers[1] = new IndexSearcher(indexStoreA);
136144
// creating the multiSearcher
137-
Searcher mSearcher = new MultiSearcher(searchers);
145+
Searcher mSearcher = getMultiSearcherInstance(searchers);
138146
// performing the search
139147
Hits hits = mSearcher.search(query);
140148

@@ -171,7 +179,7 @@ public void testEmptyIndex()
171179
searchers2[0] = new IndexSearcher(indexStoreB);
172180
searchers2[1] = new IndexSearcher(indexStoreA);
173181
// creating the mulitSearcher
174-
Searcher mSearcher2 = new MultiSearcher(searchers2);
182+
Searcher mSearcher2 = getMultiSearcherInstance(searchers2);
175183
// performing the same search
176184
Hits hits2 = mSearcher2.search(query);
177185

@@ -213,7 +221,7 @@ public void testEmptyIndex()
213221
searchers3[0] = new IndexSearcher(indexStoreB);
214222
searchers3[1] = new IndexSearcher(indexStoreA);
215223
// creating the mulitSearcher
216-
Searcher mSearcher3 = new MultiSearcher(searchers3);
224+
Searcher mSearcher3 = getMultiSearcherInstance(searchers3);
217225
// performing the same search
218226
Hits hits3 = mSearcher3.search(query);
219227

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
package org.apache.lucene.search;
2+
3+
/* ====================================================================
4+
* The Apache Software License, Version 1.1
5+
*
6+
* Copyright (c) 2001 The Apache Software Foundation. All rights
7+
* reserved.
8+
*
9+
* Redistribution and use in source and binary forms, with or without
10+
* modification, are permitted provided that the following conditions
11+
* are met:
12+
*
13+
* 1. Redistributions of source code must retain the above copyright
14+
* notice, this list of conditions and the following disclaimer.
15+
*
16+
* 2. Redistributions in binary form must reproduce the above copyright
17+
* notice, this list of conditions and the following disclaimer in
18+
* the documentation and/or other materials provided with the
19+
* distribution.
20+
*
21+
* 3. The end-user documentation included with the redistribution,
22+
* if any, must include the following acknowledgment:
23+
* "This product includes software developed by the
24+
* Apache Software Foundation (http://www.apache.org/)."
25+
* Alternately, this acknowledgment may appear in the software itself,
26+
* if and wherever such third-party acknowledgments normally appear.
27+
*
28+
* 4. The names "Apache" and "Apache Software Foundation" and
29+
* "Apache Lucene" must not be used to endorse or promote products
30+
* derived from this software without prior written permission. For
31+
* written permission, please contact [email protected].
32+
*
33+
* 5. Products derived from this software may not be called "Apache",
34+
* "Apache Lucene", nor may "Apache" appear in their name, without
35+
* prior written permission of the Apache Software Foundation.
36+
*
37+
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38+
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39+
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40+
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41+
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42+
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43+
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44+
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45+
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47+
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48+
* SUCH DAMAGE.
49+
* ====================================================================
50+
*
51+
* This software consists of voluntary contributions made by many
52+
* individuals on behalf of the Apache Software Foundation. For more
53+
* information on the Apache Software Foundation, please see
54+
* <http://www.apache.org/>.
55+
*/
56+
57+
import java.io.IOException;
58+
59+
/**
60+
* Unit tests for the ParallelMultiSearcher
61+
*/
62+
public class TestParallelMultiSearcher extends TestMultiSearcher {
63+
64+
public TestParallelMultiSearcher(String name) {
65+
super(name);
66+
}
67+
68+
protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers)
69+
throws IOException {
70+
return new ParallelMultiSearcher(searchers);
71+
}
72+
73+
}

0 commit comments

Comments
 (0)