Added ParallelMultiSearcher.

cutting · cutting · commit b1541ce02737 · 2004-01-20T18:37:09.000Z
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150172 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -31,6 +31,10 @@ $Id$
     others.  (A conjunctive BooleanQuery is a BooleanQuery where all
     clauses are required.)  (cutting)
 
+ 4. Added new class ParallelMultiSearcher.  Combined with
+    RemoteSearchable this makes it easy to implement distributed
+    search systems.  (Jean-Francois Halleux via cutting)
+
 
 1.3 final
 
diff --git a/src/java/org/apache/lucene/search/MultiSearcher.java b/src/java/org/apache/lucene/search/MultiSearcher.java
@@ -80,6 +80,10 @@ public MultiSearcher(Searchable[] searchables) throws IOException {
     }
     starts[searchables.length] = maxDoc;
   }
+  
+  protected int[] getStarts() {
+  	return starts;
+  }
 
   /** Frees resources associated with this <code>Searcher</code>. */
   public void close() throws IOException {
diff --git a/src/java/org/apache/lucene/search/ParallelMultiSearcher.java b/src/java/org/apache/lucene/search/ParallelMultiSearcher.java
@@ -0,0 +1,248 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2004 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+
+/** Implements parallel search over a set of <code>Searchables</code>.
+ *
+ * <p>Applications usually need only call the inherited {@link #search(Query)}
+ * or {@link #search(Query,Filter)} methods.
+ */
+public class ParallelMultiSearcher extends MultiSearcher {
+
+	private Searchable[] searchables;
+	private int[] starts;
+	
+	/** Creates a searcher which searches <i>searchables</i>. */
+	public ParallelMultiSearcher(Searchable[] searchables) throws IOException {
+		super(searchables);
+		this.searchables=searchables;
+		this.starts=getStarts();
+	}
+
+	/**
+	 * TODO: parallelize this one too
+	 */
+	public int docFreq(Term term) throws IOException {
+		int docFreq = 0;
+		for (int i = 0; i < searchables.length; i++)
+			docFreq += searchables[i].docFreq(term);
+		return docFreq;
+	}
+
+	/**
+	* A search implementation which spans a new thread for each
+	* Searchable, waits for each search to complete and merge
+	* the results back together.
+	*/
+	public TopDocs search(Query query, Filter filter, int nDocs)
+		throws IOException {
+		HitQueue hq = new HitQueue(nDocs);
+		int totalHits = 0;
+		MultiSearcherThread[] msta =
+			new MultiSearcherThread[searchables.length];
+		for (int i = 0; i < searchables.length; i++) { // search each searcher
+			// Assume not too many searchables and cost of creating a thread is by far inferior to a search
+			msta[i] =
+				new MultiSearcherThread(
+					searchables[i],
+					query,
+					filter,
+					nDocs,
+					hq,
+					i,
+					starts,
+					"MultiSearcher thread #" + (i + 1));
+			msta[i].start();
+		}
+
+		for (int i = 0; i < searchables.length; i++) {
+			try {
+				msta[i].join();
+			} catch (InterruptedException ie) {
+				; // TODO: what should we do with this???
+			}
+			IOException ioe = msta[i].getIOException();
+			if (ioe == null) {
+				totalHits += msta[i].hits();
+			} else {
+				// if one search produced an IOException, rethrow it
+				throw ioe;
+			}
+		}
+
+		ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
+		for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
+			scoreDocs[i] = (ScoreDoc) hq.pop();
+
+		return new TopDocs(totalHits, scoreDocs);
+	}
+
+	/** Lower-level search API.
+	 *
+	 * <p>{@link HitCollector#collect(int,float)} is called for every non-zero
+	 * scoring document.
+	 *
+	 * <p>Applications should only use this if they need <i>all</i> of the
+	 * matching documents.  The high-level search API ({@link
+	 * Searcher#search(Query)}) is usually more efficient, as it skips
+	 * non-high-scoring hits.
+	 *
+	 * @param query to match documents
+	 * @param filter if non-null, a bitset used to eliminate some documents
+	 * @param results to receive hits
+	 * 
+	 * TODO: parallelize this one too
+	 */
+	public void search(Query query, Filter filter, final HitCollector results)
+		throws IOException {
+		for (int i = 0; i < searchables.length; i++) {
+
+			final int start = starts[i];
+
+			searchables[i].search(query, filter, new HitCollector() {
+				public void collect(int doc, float score) {
+					results.collect(doc + start, score);
+				}
+			});
+
+		}
+	}
+
+	/*
+	 * TODO: this one could be parallelized too
+	 * @see org.apache.lucene.search.Searchable#rewrite(org.apache.lucene.search.Query)
+	 */
+	public Query rewrite(Query original) throws IOException {
+		Query[] queries = new Query[searchables.length];
+		for (int i = 0; i < searchables.length; i++) {
+			queries[i] = searchables[i].rewrite(original);
+		}
+		return original.combine(queries);
+	}
+
+}
+
+/**
+ * A thread subclass for searching a single searchable 
+ */
+class MultiSearcherThread extends Thread {
+
+	private Searchable searchable;
+	private Query query;
+	private Filter filter;
+	private int nDocs;
+	private int hits;
+	private TopDocs docs;
+	private int i;
+	private HitQueue hq;
+	private int[] starts;
+	private IOException ioe;
+
+	public MultiSearcherThread(
+		Searchable searchable,
+		Query query,
+		Filter filter,
+		int nDocs,
+		HitQueue hq,
+		int i,
+		int[] starts,
+		String name) {
+		super(name);
+		this.searchable = searchable;
+		this.query = query;
+		this.filter = filter;
+		this.nDocs = nDocs;
+		this.hq = hq;
+		this.i = i;
+		this.starts = starts;
+	}
+
+	public void run() {
+		try {
+			docs = searchable.search(query, filter, nDocs);
+		}
+		// Store the IOException for later use by the caller of this thread
+		catch (IOException ioe) {
+			this.ioe = ioe;
+		}
+		if (ioe == null) {
+			ScoreDoc[] scoreDocs = docs.scoreDocs;
+			for (int j = 0;
+				j < scoreDocs.length;
+				j++) { // merge scoreDocs into hq
+				ScoreDoc scoreDoc = scoreDocs[j];
+				scoreDoc.doc += starts[i]; // convert doc 
+				//it would be so nice if we had a thread-safe insert 
+				synchronized (hq) {
+					if (!hq.insert(scoreDoc))
+						break;
+				} // no more scores > minScore
+			}
+		}
+	}
+
+	public int hits() {
+		return docs.totalHits;
+	}
+
+	public IOException getIOException() {
+		return ioe;
+	}
+
+}
diff --git a/src/test/org/apache/lucene/search/TestMultiSearcher.java b/src/test/org/apache/lucene/search/TestMultiSearcher.java
@@ -81,6 +81,14 @@ public TestMultiSearcher(String name)
         super(name);
     }
 
+	/**
+	 * Return a new instance of the concrete MultiSearcher class
+	 * used in this test
+	 */
+	protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers) throws IOException {
+		return new MultiSearcher(searchers);
+	}
+	
     public void testEmptyIndex()
         throws Exception
     {
@@ -134,7 +142,7 @@ public void testEmptyIndex()
         searchers[0] = new IndexSearcher(indexStoreB);
         searchers[1] = new IndexSearcher(indexStoreA);
         // creating the multiSearcher
-        Searcher mSearcher = new MultiSearcher(searchers);
+        Searcher mSearcher = getMultiSearcherInstance(searchers);
         // performing the search
         Hits hits = mSearcher.search(query);
 
@@ -171,7 +179,7 @@ public void testEmptyIndex()
         searchers2[0] = new IndexSearcher(indexStoreB);
         searchers2[1] = new IndexSearcher(indexStoreA);
         // creating the mulitSearcher
-        Searcher mSearcher2 = new MultiSearcher(searchers2);
+        Searcher mSearcher2 = getMultiSearcherInstance(searchers2);
         // performing the same search
         Hits hits2 = mSearcher2.search(query);
 
@@ -213,7 +221,7 @@ public void testEmptyIndex()
         searchers3[0] = new IndexSearcher(indexStoreB);
         searchers3[1] = new IndexSearcher(indexStoreA);
         // creating the mulitSearcher
-        Searcher mSearcher3 = new MultiSearcher(searchers3);
+        Searcher mSearcher3 = getMultiSearcherInstance(searchers3);
         // performing the same search
         Hits hits3 = mSearcher3.search(query);
 
diff --git a/src/test/org/apache/lucene/search/TestParallelMultiSearcher.java b/src/test/org/apache/lucene/search/TestParallelMultiSearcher.java
@@ -0,0 +1,73 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+ 
+import java.io.IOException;
+
+/**
+ * Unit tests for the ParallelMultiSearcher 
+ */
+public class TestParallelMultiSearcher extends TestMultiSearcher {
+
+	public TestParallelMultiSearcher(String name) {
+		super(name);
+	}
+
+	protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers)
+		throws IOException {
+		return new ParallelMultiSearcher(searchers);
+	}
+
+}

Original file line number	Diff line number	Diff line change
`@@ -80,6 +80,10 @@ public MultiSearcher(Searchable[] searchables) throws IOException {`
`80`	`80`	`}`
`81`	`81`	`starts[searchables.length] = maxDoc;`
`82`	`82`	`}`
	`83`	`+`
	`84`	`+ protected int[] getStarts() {`
	`85`	`+ return starts;`
	`86`	`+ }`
`83`	`87`
`84`	`88`	`/** Frees resources associated with this <code>Searcher</code>. */`
`85`	`89`	`public void close() throws IOException {`