package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import java.io.File; import java.io.IOException; import java.io.PrintStream; /** *
[Note that as of 2.1, all but one of the * methods in this class are available via {@link * IndexWriter}. The one method that is not available is * {@link #deleteDocument(int)}.]
* * A class to modify an index, i.e. to delete and add documents. This * class hides {@link IndexReader} and {@link IndexWriter} so that you * do not need to care about implementation details such as that adding * documents is done via IndexWriter and deletion is done via IndexReader. * *Note that you cannot create more than one IndexModifier object
* on the same directory at the same time.
*
*
Example usage: *
Analyzer analyzer = new StandardAnalyzer();
|
Not all methods of IndexReader and IndexWriter are offered by this
* class. If you need access to additional methods, either use those classes
* directly or implement your own class that extends IndexModifier.
*
*
Although an instance of this class can be used from more than one * thread, you will not get the best performance. You might want to use * IndexReader and IndexWriter directly for that (but you will need to * care about synchronization yourself then). * *
While you can freely mix calls to add() and delete() using this class,
* you should batch you calls for best performance. For example, if you
* want to update 20 documents, you should first delete all those documents,
* then add all the new documents.
*
* @author Daniel Naber
*/
public class IndexModifier {
protected IndexWriter indexWriter = null;
protected IndexReader indexReader = null;
protected Directory directory = null;
protected Analyzer analyzer = null;
protected boolean open = false;
// Lucene defaults:
protected PrintStream infoStream = null;
protected boolean useCompoundFile = true;
protected int maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS;
protected int maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
protected int mergeFactor = IndexWriter.DEFAULT_MERGE_FACTOR;
/**
* Open an index with write access.
*
* @param directory the index directory
* @param analyzer the analyzer to use for adding new documents
* @param create true to create the index or overwrite the existing one;
* false to append to the existing index
*/
public IndexModifier(Directory directory, Analyzer analyzer, boolean create) throws IOException {
init(directory, analyzer, create);
}
/**
* Open an index with write access.
*
* @param dirName the index directory
* @param analyzer the analyzer to use for adding new documents
* @param create true to create the index or overwrite the existing one;
* false to append to the existing index
*/
public IndexModifier(String dirName, Analyzer analyzer, boolean create) throws IOException {
Directory dir = FSDirectory.getDirectory(dirName);
init(dir, analyzer, create);
}
/**
* Open an index with write access.
*
* @param file the index directory
* @param analyzer the analyzer to use for adding new documents
* @param create true to create the index or overwrite the existing one;
* false to append to the existing index
*/
public IndexModifier(File file, Analyzer analyzer, boolean create) throws IOException {
Directory dir = FSDirectory.getDirectory(file);
init(dir, analyzer, create);
}
/**
* Initialize an IndexWriter.
* @throws IOException
*/
protected void init(Directory directory, Analyzer analyzer, boolean create) throws IOException {
this.directory = directory;
synchronized(this.directory) {
this.analyzer = analyzer;
indexWriter = new IndexWriter(directory, analyzer, create);
open = true;
}
}
/**
* Throw an IllegalStateException if the index is closed.
* @throws IllegalStateException
*/
protected void assureOpen() {
if (!open) {
throw new IllegalStateException("Index is closed");
}
}
/**
* Close the IndexReader and open an IndexWriter.
* @throws IOException
*/
protected void createIndexWriter() throws IOException {
if (indexWriter == null) {
if (indexReader != null) {
indexReader.close();
indexReader = null;
}
indexWriter = new IndexWriter(directory, analyzer, false);
indexWriter.setInfoStream(infoStream);
indexWriter.setUseCompoundFile(useCompoundFile);
indexWriter.setMaxBufferedDocs(maxBufferedDocs);
indexWriter.setMaxFieldLength(maxFieldLength);
indexWriter.setMergeFactor(mergeFactor);
}
}
/**
* Close the IndexWriter and open an IndexReader.
* @throws IOException
*/
protected void createIndexReader() throws IOException {
if (indexReader == null) {
if (indexWriter != null) {
indexWriter.close();
indexWriter = null;
}
indexReader = IndexReader.open(directory);
}
}
/**
* Make sure all changes are written to disk.
* @throws IOException
*/
public void flush() throws IOException {
synchronized(directory) {
assureOpen();
if (indexWriter != null) {
indexWriter.close();
indexWriter = null;
createIndexWriter();
} else {
indexReader.close();
indexReader = null;
createIndexReader();
}
}
}
/**
* Adds a document to this index, using the provided analyzer instead of the
* one specific in the constructor. If the document contains more than
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
* discarded.
* @see IndexWriter#addDocument(Document, Analyzer)
* @throws IllegalStateException if the index is closed
*/
public void addDocument(Document doc, Analyzer docAnalyzer) throws IOException {
synchronized(directory) {
assureOpen();
createIndexWriter();
if (docAnalyzer != null)
indexWriter.addDocument(doc, docAnalyzer);
else
indexWriter.addDocument(doc);
}
}
/**
* Adds a document to this index. If the document contains more than
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
* discarded.
* @see IndexWriter#addDocument(Document)
* @throws IllegalStateException if the index is closed
*/
public void addDocument(Document doc) throws IOException {
addDocument(doc, null);
}
/**
* Deletes all documents containing term.
* This is useful if one uses a document field to hold a unique ID string for
* the document. Then to delete such a document, one merely constructs a
* term with the appropriate field and the unique ID string as its text and
* passes it to this method. Returns the number of documents deleted.
* @return the number of documents deleted
* @see IndexReader#deleteDocuments(Term)
* @throws IllegalStateException if the index is closed
*/
public int deleteDocuments(Term term) throws IOException {
synchronized(directory) {
assureOpen();
createIndexReader();
return indexReader.deleteDocuments(term);
}
}
/**
* Deletes the document numbered docNum.
* @see IndexReader#deleteDocument(int)
* @throws IllegalStateException if the index is closed
*/
public void deleteDocument(int docNum) throws IOException {
synchronized(directory) {
assureOpen();
createIndexReader();
indexReader.deleteDocument(docNum);
}
}
/**
* Returns the number of documents currently in this index.
* @see IndexWriter#docCount()
* @see IndexReader#numDocs()
* @throws IllegalStateException if the index is closed
*/
public int docCount() {
synchronized(directory) {
assureOpen();
if (indexWriter != null) {
return indexWriter.docCount();
} else {
return indexReader.numDocs();
}
}
}
/**
* Merges all segments together into a single segment, optimizing an index
* for search.
* @see IndexWriter#optimize()
* @throws IllegalStateException if the index is closed
*/
public void optimize() throws IOException {
synchronized(directory) {
assureOpen();
createIndexWriter();
indexWriter.optimize();
}
}
/**
* If non-null, information about merges and a message when
* {@link #getMaxFieldLength()} is reached will be printed to this.
*
Example: index.setInfoStream(System.err); * @see IndexWriter#setInfoStream(PrintStream) * @throws IllegalStateException if the index is closed */ public void setInfoStream(PrintStream infoStream) { synchronized(directory) { assureOpen(); if (indexWriter != null) { indexWriter.setInfoStream(infoStream); } this.infoStream = infoStream; } } /** * @throws IOException * @see IndexModifier#setInfoStream(PrintStream) */ public PrintStream getInfoStream() throws IOException { synchronized(directory) { assureOpen(); createIndexWriter(); return indexWriter.getInfoStream(); } } /** * Setting to turn on usage of a compound file. When on, multiple files * for each segment are merged into a single file once the segment creation * is finished. This is done regardless of what directory is in use. * @see IndexWriter#setUseCompoundFile(boolean) * @throws IllegalStateException if the index is closed */ public void setUseCompoundFile(boolean useCompoundFile) { synchronized(directory) { assureOpen(); if (indexWriter != null) { indexWriter.setUseCompoundFile(useCompoundFile); } this.useCompoundFile = useCompoundFile; } } /** * @throws IOException * @see IndexModifier#setUseCompoundFile(boolean) */ public boolean getUseCompoundFile() throws IOException { synchronized(directory) { assureOpen(); createIndexWriter(); return indexWriter.getUseCompoundFile(); } } /** * The maximum number of terms that will be indexed for a single field in a * document. This limits the amount of memory required for indexing, so that * collections with very large files will not crash the indexing process by * running out of memory.
* Note that this effectively truncates large documents, excluding from the * index terms that occur further in the document. If you know your source * documents are large, be sure to set this value high enough to accomodate * the expected size. If you set it to Integer.MAX_VALUE, then the only limit * is your memory, but you should anticipate an OutOfMemoryError. * By default, no more than 10,000 terms will be indexed for a field. * @see IndexWriter#setMaxFieldLength(int) * @throws IllegalStateException if the index is closed */ public void setMaxFieldLength(int maxFieldLength) { synchronized(directory) { assureOpen(); if (indexWriter != null) { indexWriter.setMaxFieldLength(maxFieldLength); } this.maxFieldLength = maxFieldLength; } } /** * @throws IOException * @see IndexModifier#setMaxFieldLength(int) */ public int getMaxFieldLength() throws IOException { synchronized(directory) { assureOpen(); createIndexWriter(); return indexWriter.getMaxFieldLength(); } } /** * Determines the minimal number of documents required before the buffered * in-memory documents are merging and a new Segment is created. * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory}, * large value gives faster indexing. At the same time, mergeFactor limits * the number of files open in a FSDirectory. * *The default value is 10. * * @see IndexWriter#setMaxBufferedDocs(int) * @throws IllegalStateException if the index is closed * @throws IllegalArgumentException if maxBufferedDocs is smaller than 2 */ public void setMaxBufferedDocs(int maxBufferedDocs) { synchronized(directory) { assureOpen(); if (indexWriter != null) { indexWriter.setMaxBufferedDocs(maxBufferedDocs); } this.maxBufferedDocs = maxBufferedDocs; } } /** * @throws IOException * @see IndexModifier#setMaxBufferedDocs(int) */ public int getMaxBufferedDocs() throws IOException { synchronized(directory) { assureOpen(); createIndexWriter(); return indexWriter.getMaxBufferedDocs(); } } /** * Determines how often segment indices are merged by addDocument(). With * smaller values, less RAM is used while indexing, and searches on * unoptimized indices are faster, but indexing speed is slower. With larger * values, more RAM is used during indexing, and while searches on unoptimized * indices are slower, indexing is faster. Thus larger values (> 10) are best * for batch index creation, and smaller values (< 10) for indices that are * interactively maintained. *
This must never be less than 2. The default value is 10. * * @see IndexWriter#setMergeFactor(int) * @throws IllegalStateException if the index is closed */ public void setMergeFactor(int mergeFactor) { synchronized(directory) { assureOpen(); if (indexWriter != null) { indexWriter.setMergeFactor(mergeFactor); } this.mergeFactor = mergeFactor; } } /** * @throws IOException * @see IndexModifier#setMergeFactor(int) */ public int getMergeFactor() throws IOException { synchronized(directory) { assureOpen(); createIndexWriter(); return indexWriter.getMergeFactor(); } } /** * Close this index, writing all pending changes to disk. * * @throws IllegalStateException if the index has been closed before already */ public void close() throws IOException { synchronized(directory) { if (!open) throw new IllegalStateException("Index is closed already"); if (indexWriter != null) { indexWriter.close(); indexWriter = null; } else { indexReader.close(); indexReader = null; } open = false; } } public String toString() { return "Index@" + directory; } /* // used as an example in the javadoc: public static void main(String[] args) throws IOException { Analyzer analyzer = new StandardAnalyzer(); // create an index in /tmp/index, overwriting an existing one: IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true); Document doc = new Document(); doc.add(new Fieldable("id", "1", Fieldable.Store.YES, Fieldable.Index.UN_TOKENIZED)); doc.add(new Fieldable("body", "a simple test", Fieldable.Store.YES, Fieldable.Index.TOKENIZED)); indexModifier.addDocument(doc); int deleted = indexModifier.delete(new Term("id", "1")); System.out.println("Deleted " + deleted + " document"); indexModifier.flush(); System.out.println(indexModifier.docCount() + " docs in index"); indexModifier.close(); }*/ }