lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.lucene.luke.models.tools;

 import java.io.BufferedWriter;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.nio.charset.Charset;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.List;
 import java.util.Locale;
 import java.util.Objects;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.CheckIndex;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.MultiTerms;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.luke.models.LukeException;
 import org.apache.lucene.luke.models.LukeModel;
 import org.apache.lucene.luke.models.util.IndexUtils;
 import org.apache.lucene.luke.models.util.twentynewsgroups.Message;
 import org.apache.lucene.luke.models.util.twentynewsgroups.MessageFilesParser;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;

 /** Default implementation of {@link IndexTools} */
 public final class IndexToolsImpl extends LukeModel implements IndexTools {

   private final boolean useCompound;

   private final boolean keepAllCommits;

   /**
    * Constructs an IndexToolsImpl that holds given {@link Directory}.
    *
    * @param dir - the index directory
    * @param useCompound - if true, compound file format is used
    * @param keepAllCommits - if true, all commit points are reserved
    */
   public IndexToolsImpl(Directory dir, boolean useCompound, boolean keepAllCommits) {
     super(dir);
     this.useCompound = useCompound;
     this.keepAllCommits = keepAllCommits;
   }

   /**
    * Constructs an IndexToolsImpl that holds given {@link IndexReader}.
    *
    * @param reader - the index reader
    * @param useCompound - if true, compound file format is used
    * @param keepAllCommits - if true, all commit points are reserved
    */
   public IndexToolsImpl(IndexReader reader, boolean useCompound, boolean keepAllCommits) {
     super(reader);
     this.useCompound = useCompound;
     this.keepAllCommits = keepAllCommits;
   }

   @Override
   public void optimize(boolean expunge, int maxNumSegments, PrintStream ps) {
     if (reader instanceof DirectoryReader) {
       Directory dir = ((DirectoryReader) reader).directory();
       try (IndexWriter writer = IndexUtils.createWriter(dir, null, useCompound, keepAllCommits, ps)) {
         IndexUtils.optimizeIndex(writer, expunge, maxNumSegments);
       } catch (IOException e) {
         throw new LukeException("Failed to optimize index", e);
       }
     } else {
       throw new LukeException("Current reader is not a DirectoryReader.");
     }
   }

   @Override
   public CheckIndex.Status checkIndex(PrintStream ps) {
     try {
       if (dir != null) {
         return IndexUtils.checkIndex(dir, ps);
       } else if (reader instanceof DirectoryReader) {
         Directory dir = ((DirectoryReader) reader).directory();
         return IndexUtils.checkIndex(dir, ps);
       } else {
         throw new IllegalStateException("Directory is not set.");
       }
     } catch (Exception e) {
       throw new LukeException("Failed to check index.", e);
     }
   }

   @Override
   public void repairIndex(CheckIndex.Status st, PrintStream ps) {
     try {
       if (dir != null) {
         IndexUtils.tryRepairIndex(dir, st, ps);
       } else {
         throw new IllegalStateException("Directory is not set.");
       }
     } catch (Exception e) {
       throw new LukeException("Failed to repair index.", e);
     }
   }

   @Override
   public void addDocument(Document doc, Analyzer analyzer) {
     Objects.requireNonNull(analyzer);

     if (reader instanceof DirectoryReader) {
       Directory dir = ((DirectoryReader) reader).directory();
       try (IndexWriter writer = IndexUtils.createWriter(dir, analyzer, useCompound, keepAllCommits)) {
         writer.addDocument(doc);
         writer.commit();
       } catch (IOException e) {
         throw new LukeException("Failed to add document", e);
       }
     } else {
       throw new LukeException("Current reader is not an instance of DirectoryReader.");
     }
   }

   @Override
   public void deleteDocuments(Query query) {
     Objects.requireNonNull(query);

     if (reader instanceof DirectoryReader) {
       Directory dir = ((DirectoryReader) reader).directory();
       try (IndexWriter writer = IndexUtils.createWriter(dir, null, useCompound, keepAllCommits)) {
         writer.deleteDocuments(query);
         writer.commit();
       } catch (IOException e) {
         throw new LukeException("Failed to add document", e);
       }
     } else {
       throw new LukeException("Current reader is not an instance of DirectoryReader.");
     }
   }

   @Override
   public void createNewIndex() {
     createNewIndex(null);
   }

   @Override
   public void createNewIndex(String dataDir) {
     IndexWriter writer = null;
     try {
       if (dir == null || dir.listAll().length > 0) {
         // Directory is null or not empty
         throw new IllegalStateException();
       }

       writer = IndexUtils.createWriter(dir, Message.createLuceneAnalyzer(), useCompound, keepAllCommits);

       if (Objects.nonNull(dataDir)) {
         Path path = Paths.get(dataDir);
         MessageFilesParser parser = new MessageFilesParser(path);
         List<Message> messages = parser.parseAll();
         for (Message message : messages) {
           writer.addDocument(message.toLuceneDoc());
         }
       }

       writer.commit();
     } catch (IOException e) {
       throw new LukeException("Cannot create new index.", e);
     } finally {
       if (writer != null) {
         try {
           writer.close();
         } catch (IOException e) {}
       }
     }
   }

   public String exportTerms(String destDir, String field, String delimiter) {
     String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out";
     Path path = Paths.get(destDir, filename);
     try {
       Terms terms = MultiTerms.getTerms(reader, field);
       if (terms == null) {
         throw new LukeException(String.format(Locale.US, "Field %s does not contain any terms to be exported", field));
       }
       try (BufferedWriter writer = Files.newBufferedWriter(path, Charset.forName("UTF-8"))) {
         TermsEnum termsEnum = terms.iterator();
         BytesRef term;
         while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) {
           writer.write(String.format(Locale.US, "%s%s%d\n", term.utf8ToString(), delimiter, +termsEnum.docFreq()));
         }
         return path.toString();
       }
     } catch (IOException e) {
       throw new LukeException("Terms file export for field [" + field + "] to file [" + filename + "] has failed.", e);
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.lucene.luke.models.tools;

	import java.io.BufferedWriter;
	import java.io.IOException;
	import java.io.PrintStream;
	import java.nio.charset.Charset;
	import java.nio.file.Files;
	import java.nio.file.Path;
	import java.nio.file.Paths;
	import java.util.List;
	import java.util.Locale;
	import java.util.Objects;

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.index.CheckIndex;
	import org.apache.lucene.index.DirectoryReader;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.IndexWriter;
	import org.apache.lucene.index.MultiTerms;
	import org.apache.lucene.index.Terms;
	import org.apache.lucene.index.TermsEnum;
	import org.apache.lucene.luke.models.LukeException;
	import org.apache.lucene.luke.models.LukeModel;
	import org.apache.lucene.luke.models.util.IndexUtils;
	import org.apache.lucene.luke.models.util.twentynewsgroups.Message;
	import org.apache.lucene.luke.models.util.twentynewsgroups.MessageFilesParser;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.BytesRef;

	/** Default implementation of {@link IndexTools} */
	public final class IndexToolsImpl extends LukeModel implements IndexTools {

	private final boolean useCompound;

	private final boolean keepAllCommits;

	/**
	* Constructs an IndexToolsImpl that holds given {@link Directory}.
	*
	* @param dir - the index directory
	* @param useCompound - if true, compound file format is used
	* @param keepAllCommits - if true, all commit points are reserved
	*/
	public IndexToolsImpl(Directory dir, boolean useCompound, boolean keepAllCommits) {
	super(dir);
	this.useCompound = useCompound;
	this.keepAllCommits = keepAllCommits;
	}

	/**
	* Constructs an IndexToolsImpl that holds given {@link IndexReader}.
	*
	* @param reader - the index reader
	* @param useCompound - if true, compound file format is used
	* @param keepAllCommits - if true, all commit points are reserved
	*/
	public IndexToolsImpl(IndexReader reader, boolean useCompound, boolean keepAllCommits) {
	super(reader);
	this.useCompound = useCompound;
	this.keepAllCommits = keepAllCommits;
	}

	@Override
	public void optimize(boolean expunge, int maxNumSegments, PrintStream ps) {
	if (reader instanceof DirectoryReader) {
	Directory dir = ((DirectoryReader) reader).directory();
	try (IndexWriter writer = IndexUtils.createWriter(dir, null, useCompound, keepAllCommits, ps)) {
	IndexUtils.optimizeIndex(writer, expunge, maxNumSegments);
	} catch (IOException e) {
	throw new LukeException("Failed to optimize index", e);
	}
	} else {
	throw new LukeException("Current reader is not a DirectoryReader.");
	}
	}

	@Override
	public CheckIndex.Status checkIndex(PrintStream ps) {
	try {
	if (dir != null) {
	return IndexUtils.checkIndex(dir, ps);
	} else if (reader instanceof DirectoryReader) {
	Directory dir = ((DirectoryReader) reader).directory();
	return IndexUtils.checkIndex(dir, ps);
	} else {
	throw new IllegalStateException("Directory is not set.");
	}
	} catch (Exception e) {
	throw new LukeException("Failed to check index.", e);
	}
	}

	@Override
	public void repairIndex(CheckIndex.Status st, PrintStream ps) {
	try {
	if (dir != null) {
	IndexUtils.tryRepairIndex(dir, st, ps);
	} else {
	throw new IllegalStateException("Directory is not set.");
	}
	} catch (Exception e) {
	throw new LukeException("Failed to repair index.", e);
	}
	}

	@Override
	public void addDocument(Document doc, Analyzer analyzer) {
	Objects.requireNonNull(analyzer);

	if (reader instanceof DirectoryReader) {
	Directory dir = ((DirectoryReader) reader).directory();
	try (IndexWriter writer = IndexUtils.createWriter(dir, analyzer, useCompound, keepAllCommits)) {
	writer.addDocument(doc);
	writer.commit();
	} catch (IOException e) {
	throw new LukeException("Failed to add document", e);
	}
	} else {
	throw new LukeException("Current reader is not an instance of DirectoryReader.");
	}
	}

	@Override
	public void deleteDocuments(Query query) {
	Objects.requireNonNull(query);

	if (reader instanceof DirectoryReader) {
	Directory dir = ((DirectoryReader) reader).directory();
	try (IndexWriter writer = IndexUtils.createWriter(dir, null, useCompound, keepAllCommits)) {
	writer.deleteDocuments(query);
	writer.commit();
	} catch (IOException e) {
	throw new LukeException("Failed to add document", e);
	}
	} else {
	throw new LukeException("Current reader is not an instance of DirectoryReader.");
	}
	}

	@Override
	public void createNewIndex() {
	createNewIndex(null);
	}

	@Override
	public void createNewIndex(String dataDir) {
	IndexWriter writer = null;
	try {
	if (dir == null \|\| dir.listAll().length > 0) {
	// Directory is null or not empty
	throw new IllegalStateException();
	}

	writer = IndexUtils.createWriter(dir, Message.createLuceneAnalyzer(), useCompound, keepAllCommits);

	if (Objects.nonNull(dataDir)) {
	Path path = Paths.get(dataDir);
	MessageFilesParser parser = new MessageFilesParser(path);
	List<Message> messages = parser.parseAll();
	for (Message message : messages) {
	writer.addDocument(message.toLuceneDoc());
	}
	}

	writer.commit();
	} catch (IOException e) {
	throw new LukeException("Cannot create new index.", e);
	} finally {
	if (writer != null) {
	try {
	writer.close();
	} catch (IOException e) {}
	}
	}
	}

	public String exportTerms(String destDir, String field, String delimiter) {
	String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out";
	Path path = Paths.get(destDir, filename);
	try {
	Terms terms = MultiTerms.getTerms(reader, field);
	if (terms == null) {
	throw new LukeException(String.format(Locale.US, "Field %s does not contain any terms to be exported", field));
	}
	try (BufferedWriter writer = Files.newBufferedWriter(path, Charset.forName("UTF-8"))) {
	TermsEnum termsEnum = terms.iterator();
	BytesRef term;
	while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) {
	writer.write(String.format(Locale.US, "%s%s%d\n", term.utf8ToString(), delimiter, +termsEnum.docFreq()));
	}
	return path.toString();
	}
	} catch (IOException e) {
	throw new LukeException("Terms file export for field [" + field + "] to file [" + filename + "] has failed.", e);
	}
	}
	}