package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import java.io.File; import java.io.IOException; import java.io.PrintStream; /** *

[Note that as of 2.1, all but one of the * methods in this class are available via {@link * IndexWriter}. The one method that is not available is * {@link #deleteDocument(int)}.]

* * A class to modify an index, i.e. to delete and add documents. This * class hides {@link IndexReader} and {@link IndexWriter} so that you * do not need to care about implementation details such as that adding * documents is done via IndexWriter and deletion is done via IndexReader. * *

Note that you cannot create more than one IndexModifier object * on the same directory at the same time. * *

Example usage: *

    Analyzer analyzer = new StandardAnalyzer();
    // create an index in /tmp/index, overwriting an existing one:
    IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true);
    Document doc = new Document();
    doc.add(new Field("id""1", Field.Store.YES, Field.Index.UN_TOKENIZED));
    doc.add(new Field("body""a simple test", Field.Store.YES, Field.Index.TOKENIZED));
    indexModifier.addDocument(doc);
    int deleted = indexModifier.delete(new Term("id""1"));
    System.out.println("Deleted " + deleted + " document");
    indexModifier.flush();
    System.out.println(indexModifier.docCount() " docs in index");
    indexModifier.close();
* *

Not all methods of IndexReader and IndexWriter are offered by this * class. If you need access to additional methods, either use those classes * directly or implement your own class that extends IndexModifier. * *

Although an instance of this class can be used from more than one * thread, you will not get the best performance. You might want to use * IndexReader and IndexWriter directly for that (but you will need to * care about synchronization yourself then). * *

While you can freely mix calls to add() and delete() using this class, * you should batch you calls for best performance. For example, if you * want to update 20 documents, you should first delete all those documents, * then add all the new documents. * * @author Daniel Naber */ public class IndexModifier { protected IndexWriter indexWriter = null; protected IndexReader indexReader = null; protected Directory directory = null; protected Analyzer analyzer = null; protected boolean open = false; // Lucene defaults: protected PrintStream infoStream = null; protected boolean useCompoundFile = true; protected int maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS; protected int maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH; protected int mergeFactor = IndexWriter.DEFAULT_MERGE_FACTOR; /** * Open an index with write access. * * @param directory the index directory * @param analyzer the analyzer to use for adding new documents * @param create true to create the index or overwrite the existing one; * false to append to the existing index */ public IndexModifier(Directory directory, Analyzer analyzer, boolean create) throws IOException { init(directory, analyzer, create); } /** * Open an index with write access. * * @param dirName the index directory * @param analyzer the analyzer to use for adding new documents * @param create true to create the index or overwrite the existing one; * false to append to the existing index */ public IndexModifier(String dirName, Analyzer analyzer, boolean create) throws IOException { Directory dir = FSDirectory.getDirectory(dirName); init(dir, analyzer, create); } /** * Open an index with write access. * * @param file the index directory * @param analyzer the analyzer to use for adding new documents * @param create true to create the index or overwrite the existing one; * false to append to the existing index */ public IndexModifier(File file, Analyzer analyzer, boolean create) throws IOException { Directory dir = FSDirectory.getDirectory(file); init(dir, analyzer, create); } /** * Initialize an IndexWriter. * @throws IOException */ protected void init(Directory directory, Analyzer analyzer, boolean create) throws IOException { this.directory = directory; synchronized(this.directory) { this.analyzer = analyzer; indexWriter = new IndexWriter(directory, analyzer, create); open = true; } } /** * Throw an IllegalStateException if the index is closed. * @throws IllegalStateException */ protected void assureOpen() { if (!open) { throw new IllegalStateException("Index is closed"); } } /** * Close the IndexReader and open an IndexWriter. * @throws IOException */ protected void createIndexWriter() throws IOException { if (indexWriter == null) { if (indexReader != null) { indexReader.close(); indexReader = null; } indexWriter = new IndexWriter(directory, analyzer, false); indexWriter.setInfoStream(infoStream); indexWriter.setUseCompoundFile(useCompoundFile); indexWriter.setMaxBufferedDocs(maxBufferedDocs); indexWriter.setMaxFieldLength(maxFieldLength); indexWriter.setMergeFactor(mergeFactor); } } /** * Close the IndexWriter and open an IndexReader. * @throws IOException */ protected void createIndexReader() throws IOException { if (indexReader == null) { if (indexWriter != null) { indexWriter.close(); indexWriter = null; } indexReader = IndexReader.open(directory); } } /** * Make sure all changes are written to disk. * @throws IOException */ public void flush() throws IOException { synchronized(directory) { assureOpen(); if (indexWriter != null) { indexWriter.close(); indexWriter = null; createIndexWriter(); } else { indexReader.close(); indexReader = null; createIndexReader(); } } } /** * Adds a document to this index, using the provided analyzer instead of the * one specific in the constructor. If the document contains more than * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are * discarded. * @see IndexWriter#addDocument(Document, Analyzer) * @throws IllegalStateException if the index is closed */ public void addDocument(Document doc, Analyzer docAnalyzer) throws IOException { synchronized(directory) { assureOpen(); createIndexWriter(); if (docAnalyzer != null) indexWriter.addDocument(doc, docAnalyzer); else indexWriter.addDocument(doc); } } /** * Adds a document to this index. If the document contains more than * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are * discarded. * @see IndexWriter#addDocument(Document) * @throws IllegalStateException if the index is closed */ public void addDocument(Document doc) throws IOException { addDocument(doc, null); } /** * Deletes all documents containing term. * This is useful if one uses a document field to hold a unique ID string for * the document. Then to delete such a document, one merely constructs a * term with the appropriate field and the unique ID string as its text and * passes it to this method. Returns the number of documents deleted. * @return the number of documents deleted * @see IndexReader#deleteDocuments(Term) * @throws IllegalStateException if the index is closed */ public int deleteDocuments(Term term) throws IOException { synchronized(directory) { assureOpen(); createIndexReader(); return indexReader.deleteDocuments(term); } } /** * Deletes the document numbered docNum. * @see IndexReader#deleteDocument(int) * @throws IllegalStateException if the index is closed */ public void deleteDocument(int docNum) throws IOException { synchronized(directory) { assureOpen(); createIndexReader(); indexReader.deleteDocument(docNum); } } /** * Returns the number of documents currently in this index. * @see IndexWriter#docCount() * @see IndexReader#numDocs() * @throws IllegalStateException if the index is closed */ public int docCount() { synchronized(directory) { assureOpen(); if (indexWriter != null) { return indexWriter.docCount(); } else { return indexReader.numDocs(); } } } /** * Merges all segments together into a single segment, optimizing an index * for search. * @see IndexWriter#optimize() * @throws IllegalStateException if the index is closed */ public void optimize() throws IOException { synchronized(directory) { assureOpen(); createIndexWriter(); indexWriter.optimize(); } } /** * If non-null, information about merges and a message when * {@link #getMaxFieldLength()} is reached will be printed to this. *

Example: index.setInfoStream(System.err); * @see IndexWriter#setInfoStream(PrintStream) * @throws IllegalStateException if the index is closed */ public void setInfoStream(PrintStream infoStream) { synchronized(directory) { assureOpen(); if (indexWriter != null) { indexWriter.setInfoStream(infoStream); } this.infoStream = infoStream; } } /** * @throws IOException * @see IndexModifier#setInfoStream(PrintStream) */ public PrintStream getInfoStream() throws IOException { synchronized(directory) { assureOpen(); createIndexWriter(); return indexWriter.getInfoStream(); } } /** * Setting to turn on usage of a compound file. When on, multiple files * for each segment are merged into a single file once the segment creation * is finished. This is done regardless of what directory is in use. * @see IndexWriter#setUseCompoundFile(boolean) * @throws IllegalStateException if the index is closed */ public void setUseCompoundFile(boolean useCompoundFile) { synchronized(directory) { assureOpen(); if (indexWriter != null) { indexWriter.setUseCompoundFile(useCompoundFile); } this.useCompoundFile = useCompoundFile; } } /** * @throws IOException * @see IndexModifier#setUseCompoundFile(boolean) */ public boolean getUseCompoundFile() throws IOException { synchronized(directory) { assureOpen(); createIndexWriter(); return indexWriter.getUseCompoundFile(); } } /** * The maximum number of terms that will be indexed for a single field in a * document. This limits the amount of memory required for indexing, so that * collections with very large files will not crash the indexing process by * running out of memory.

* Note that this effectively truncates large documents, excluding from the * index terms that occur further in the document. If you know your source * documents are large, be sure to set this value high enough to accomodate * the expected size. If you set it to Integer.MAX_VALUE, then the only limit * is your memory, but you should anticipate an OutOfMemoryError.

* By default, no more than 10,000 terms will be indexed for a field. * @see IndexWriter#setMaxFieldLength(int) * @throws IllegalStateException if the index is closed */ public void setMaxFieldLength(int maxFieldLength) { synchronized(directory) { assureOpen(); if (indexWriter != null) { indexWriter.setMaxFieldLength(maxFieldLength); } this.maxFieldLength = maxFieldLength; } } /** * @throws IOException * @see IndexModifier#setMaxFieldLength(int) */ public int getMaxFieldLength() throws IOException { synchronized(directory) { assureOpen(); createIndexWriter(); return indexWriter.getMaxFieldLength(); } } /** * Determines the minimal number of documents required before the buffered * in-memory documents are merging and a new Segment is created. * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory}, * large value gives faster indexing. At the same time, mergeFactor limits * the number of files open in a FSDirectory. * *

The default value is 10. * * @see IndexWriter#setMaxBufferedDocs(int) * @throws IllegalStateException if the index is closed * @throws IllegalArgumentException if maxBufferedDocs is smaller than 2 */ public void setMaxBufferedDocs(int maxBufferedDocs) { synchronized(directory) { assureOpen(); if (indexWriter != null) { indexWriter.setMaxBufferedDocs(maxBufferedDocs); } this.maxBufferedDocs = maxBufferedDocs; } } /** * @throws IOException * @see IndexModifier#setMaxBufferedDocs(int) */ public int getMaxBufferedDocs() throws IOException { synchronized(directory) { assureOpen(); createIndexWriter(); return indexWriter.getMaxBufferedDocs(); } } /** * Determines how often segment indices are merged by addDocument(). With * smaller values, less RAM is used while indexing, and searches on * unoptimized indices are faster, but indexing speed is slower. With larger * values, more RAM is used during indexing, and while searches on unoptimized * indices are slower, indexing is faster. Thus larger values (> 10) are best * for batch index creation, and smaller values (< 10) for indices that are * interactively maintained. *

This must never be less than 2. The default value is 10. * * @see IndexWriter#setMergeFactor(int) * @throws IllegalStateException if the index is closed */ public void setMergeFactor(int mergeFactor) { synchronized(directory) { assureOpen(); if (indexWriter != null) { indexWriter.setMergeFactor(mergeFactor); } this.mergeFactor = mergeFactor; } } /** * @throws IOException * @see IndexModifier#setMergeFactor(int) */ public int getMergeFactor() throws IOException { synchronized(directory) { assureOpen(); createIndexWriter(); return indexWriter.getMergeFactor(); } } /** * Close this index, writing all pending changes to disk. * * @throws IllegalStateException if the index has been closed before already */ public void close() throws IOException { synchronized(directory) { if (!open) throw new IllegalStateException("Index is closed already"); if (indexWriter != null) { indexWriter.close(); indexWriter = null; } else { indexReader.close(); indexReader = null; } open = false; } } public String toString() { return "Index@" + directory; } /* // used as an example in the javadoc: public static void main(String[] args) throws IOException { Analyzer analyzer = new StandardAnalyzer(); // create an index in /tmp/index, overwriting an existing one: IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true); Document doc = new Document(); doc.add(new Fieldable("id", "1", Fieldable.Store.YES, Fieldable.Index.UN_TOKENIZED)); doc.add(new Fieldable("body", "a simple test", Fieldable.Store.YES, Fieldable.Index.TOKENIZED)); indexModifier.addDocument(doc); int deleted = indexModifier.delete(new Term("id", "1")); System.out.println("Deleted " + deleted + " document"); indexModifier.flush(); System.out.println(indexModifier.docCount() + " docs in index"); indexModifier.close(); }*/ }