| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.codecs.simpletext; |
| |
| |
| import java.io.IOException; |
| import java.nio.charset.StandardCharsets; |
| import java.text.ParseException; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Map; |
| import java.util.Set; |
| |
| import org.apache.lucene.codecs.SegmentInfoFormat; |
| import org.apache.lucene.index.CorruptIndexException; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.SegmentInfo; |
| import org.apache.lucene.search.Sort; |
| import org.apache.lucene.search.SortField; |
| import org.apache.lucene.search.SortedNumericSelector; |
| import org.apache.lucene.search.SortedNumericSortField; |
| import org.apache.lucene.search.SortedSetSelector; |
| import org.apache.lucene.search.SortedSetSortField; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.BytesRefBuilder; |
| import org.apache.lucene.util.StringHelper; |
| import org.apache.lucene.util.Version; |
| |
| /** |
| * plain text segments file format. |
| * <p> |
| * <b>FOR RECREATIONAL USE ONLY</b> |
| * @lucene.experimental |
| */ |
| public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat { |
| final static BytesRef SI_VERSION = new BytesRef(" version "); |
| final static BytesRef SI_MIN_VERSION = new BytesRef(" min version "); |
| final static BytesRef SI_DOCCOUNT = new BytesRef(" number of documents "); |
| final static BytesRef SI_USECOMPOUND = new BytesRef(" uses compound file "); |
| final static BytesRef SI_NUM_DIAG = new BytesRef(" diagnostics "); |
| final static BytesRef SI_DIAG_KEY = new BytesRef(" key "); |
| final static BytesRef SI_DIAG_VALUE = new BytesRef(" value "); |
| final static BytesRef SI_NUM_ATT = new BytesRef(" attributes "); |
| final static BytesRef SI_ATT_KEY = new BytesRef(" key "); |
| final static BytesRef SI_ATT_VALUE = new BytesRef(" value "); |
| final static BytesRef SI_NUM_FILES = new BytesRef(" files "); |
| final static BytesRef SI_FILE = new BytesRef(" file "); |
| final static BytesRef SI_ID = new BytesRef(" id "); |
| final static BytesRef SI_SORT = new BytesRef(" sort "); |
| final static BytesRef SI_SORT_FIELD = new BytesRef(" field "); |
| final static BytesRef SI_SORT_TYPE = new BytesRef(" type "); |
| final static BytesRef SI_SELECTOR_TYPE = new BytesRef(" selector "); |
| final static BytesRef SI_SORT_REVERSE = new BytesRef(" reverse "); |
| final static BytesRef SI_SORT_MISSING = new BytesRef(" missing "); |
| |
| public static final String SI_EXTENSION = "si"; |
| |
| @Override |
| public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException { |
| BytesRefBuilder scratch = new BytesRefBuilder(); |
| String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); |
| try (ChecksumIndexInput input = directory.openChecksumInput(segFileName, context)) { |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_VERSION); |
| final Version version; |
| try { |
| version = Version.parse(readString(SI_VERSION.length, scratch)); |
| } catch (ParseException pe) { |
| throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe); |
| } |
| |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_MIN_VERSION); |
| Version minVersion; |
| try { |
| String versionString = readString(SI_MIN_VERSION.length, scratch); |
| if (versionString.equals("null")) { |
| minVersion = null; |
| } else { |
| minVersion = Version.parse(versionString); |
| } |
| } catch (ParseException pe) { |
| throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe); |
| } |
| |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_DOCCOUNT); |
| final int docCount = Integer.parseInt(readString(SI_DOCCOUNT.length, scratch)); |
| |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_USECOMPOUND); |
| final boolean isCompoundFile = Boolean.parseBoolean(readString(SI_USECOMPOUND.length, scratch)); |
| |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_NUM_DIAG); |
| int numDiag = Integer.parseInt(readString(SI_NUM_DIAG.length, scratch)); |
| Map<String,String> diagnostics = new HashMap<>(); |
| |
| for (int i = 0; i < numDiag; i++) { |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_DIAG_KEY); |
| String key = readString(SI_DIAG_KEY.length, scratch); |
| |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_DIAG_VALUE); |
| String value = readString(SI_DIAG_VALUE.length, scratch); |
| diagnostics.put(key, value); |
| } |
| |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_NUM_ATT); |
| int numAtt = Integer.parseInt(readString(SI_NUM_ATT.length, scratch)); |
| Map<String,String> attributes = new HashMap<>(numAtt); |
| |
| for (int i = 0; i < numAtt; i++) { |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_ATT_KEY); |
| String key = readString(SI_ATT_KEY.length, scratch); |
| |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_ATT_VALUE); |
| String value = readString(SI_ATT_VALUE.length, scratch); |
| attributes.put(key, value); |
| } |
| |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_NUM_FILES); |
| int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch)); |
| Set<String> files = new HashSet<>(); |
| |
| for (int i = 0; i < numFiles; i++) { |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_FILE); |
| String fileName = readString(SI_FILE.length, scratch); |
| files.add(fileName); |
| } |
| |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_ID); |
| final byte[] id = Arrays.copyOfRange(scratch.bytes(), SI_ID.length, scratch.length()); |
| |
| if (!Arrays.equals(segmentID, id)) { |
| throw new CorruptIndexException("file mismatch, expected: " + StringHelper.idToString(segmentID) |
| + ", got: " + StringHelper.idToString(id), input); |
| } |
| |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_SORT); |
| final int numSortFields = Integer.parseInt(readString(SI_SORT.length, scratch)); |
| SortField[] sortField = new SortField[numSortFields]; |
| for (int i = 0; i < numSortFields; ++i) { |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_SORT_FIELD); |
| final String field = readString(SI_SORT_FIELD.length, scratch); |
| |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_SORT_TYPE); |
| final String typeAsString = readString(SI_SORT_TYPE.length, scratch); |
| |
| final SortField.Type type; |
| SortedSetSelector.Type selectorSet = null; |
| SortedNumericSelector.Type selectorNumeric = null; |
| switch (typeAsString) { |
| case "string": |
| type = SortField.Type.STRING; |
| break; |
| case "long": |
| type = SortField.Type.LONG; |
| break; |
| case "int": |
| type = SortField.Type.INT; |
| break; |
| case "double": |
| type = SortField.Type.DOUBLE; |
| break; |
| case "float": |
| type = SortField.Type.FLOAT; |
| break; |
| case "multi_valued_string": |
| type = SortField.Type.STRING; |
| selectorSet = readSetSelector(input, scratch); |
| break; |
| case "multi_valued_long": |
| type = SortField.Type.LONG; |
| selectorNumeric = readNumericSelector(input, scratch); |
| break; |
| case "multi_valued_int": |
| type = SortField.Type.INT; |
| selectorNumeric = readNumericSelector(input, scratch); |
| break; |
| case "multi_valued_double": |
| type = SortField.Type.DOUBLE; |
| selectorNumeric = readNumericSelector(input, scratch); |
| break; |
| case "multi_valued_float": |
| type = SortField.Type.FLOAT; |
| selectorNumeric = readNumericSelector(input, scratch); |
| break; |
| default: |
| throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input); |
| } |
| |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_SORT_REVERSE); |
| final boolean reverse = Boolean.parseBoolean(readString(SI_SORT_REVERSE.length, scratch)); |
| |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_SORT_MISSING); |
| final String missingLastAsString = readString(SI_SORT_MISSING.length, scratch); |
| final Object missingValue; |
| switch (type) { |
| case STRING: |
| switch (missingLastAsString) { |
| case "null": |
| missingValue = null; |
| break; |
| case "first": |
| missingValue = SortField.STRING_FIRST; |
| break; |
| case "last": |
| missingValue = SortField.STRING_LAST; |
| break; |
| default: |
| throw new CorruptIndexException("unable to parse missing string: " + typeAsString, input); |
| } |
| break; |
| case LONG: |
| switch (missingLastAsString) { |
| case "null": |
| missingValue = null; |
| break; |
| default: |
| missingValue = Long.parseLong(missingLastAsString); |
| break; |
| } |
| break; |
| case INT: |
| switch (missingLastAsString) { |
| case "null": |
| missingValue = null; |
| break; |
| default: |
| missingValue = Integer.parseInt(missingLastAsString); |
| break; |
| } |
| break; |
| case DOUBLE: |
| switch (missingLastAsString) { |
| case "null": |
| missingValue = null; |
| break; |
| default: |
| missingValue = Double.parseDouble(missingLastAsString); |
| break; |
| } |
| break; |
| case FLOAT: |
| switch (missingLastAsString) { |
| case "null": |
| missingValue = null; |
| break; |
| default: |
| missingValue = Float.parseFloat(missingLastAsString); |
| break; |
| } |
| break; |
| default: |
| throw new AssertionError(); |
| } |
| if (selectorSet != null) { |
| sortField[i] = new SortedSetSortField(field, reverse); |
| } else if (selectorNumeric != null) { |
| sortField[i] = new SortedNumericSortField(field, type, reverse); |
| } else { |
| sortField[i] = new SortField(field, type, reverse); |
| } |
| if (missingValue != null) { |
| sortField[i].setMissingValue(missingValue); |
| } |
| } |
| Sort indexSort = sortField.length == 0 ? null : new Sort(sortField); |
| |
| SimpleTextUtil.checkFooter(input); |
| |
| SegmentInfo info = new SegmentInfo(directory, version, minVersion, segmentName, docCount, |
| isCompoundFile, null, Collections.unmodifiableMap(diagnostics), |
| id, Collections.unmodifiableMap(attributes), indexSort); |
| info.setFiles(files); |
| return info; |
| } |
| } |
| |
| private String readString(int offset, BytesRefBuilder scratch) { |
| return new String(scratch.bytes(), offset, scratch.length()-offset, StandardCharsets.UTF_8); |
| } |
| |
| private SortedSetSelector.Type readSetSelector(IndexInput input, BytesRefBuilder scratch) throws IOException { |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_SELECTOR_TYPE); |
| final String selectorAsString = readString(SI_SELECTOR_TYPE.length, scratch); |
| switch (selectorAsString) { |
| case "min": |
| return SortedSetSelector.Type.MIN; |
| case "middle_min": |
| return SortedSetSelector.Type.MIDDLE_MIN; |
| case "middle_max": |
| return SortedSetSelector.Type.MIDDLE_MAX; |
| case "max": |
| return SortedSetSelector.Type.MAX; |
| default: |
| throw new CorruptIndexException("unable to parse SortedSetSelector type: " + selectorAsString, input); |
| } |
| } |
| |
| private SortedNumericSelector.Type readNumericSelector(IndexInput input, BytesRefBuilder scratch) throws IOException { |
| SimpleTextUtil.readLine(input, scratch); |
| assert StringHelper.startsWith(scratch.get(), SI_SELECTOR_TYPE); |
| final String selectorAsString = readString(SI_SELECTOR_TYPE.length, scratch); |
| switch (selectorAsString) { |
| case "min": |
| return SortedNumericSelector.Type.MIN; |
| case "max": |
| return SortedNumericSelector.Type.MAX; |
| default: |
| throw new CorruptIndexException("unable to parse SortedNumericSelector type: " + selectorAsString, input); |
| } |
| } |
| |
| @Override |
| public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException { |
| |
| String segFileName = IndexFileNames.segmentFileName(si.name, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); |
| |
| try (IndexOutput output = dir.createOutput(segFileName, ioContext)) { |
| // Only add the file once we've successfully created it, else IFD assert can trip: |
| si.addFile(segFileName); |
| BytesRefBuilder scratch = new BytesRefBuilder(); |
| |
| SimpleTextUtil.write(output, SI_VERSION); |
| SimpleTextUtil.write(output, si.getVersion().toString(), scratch); |
| SimpleTextUtil.writeNewline(output); |
| |
| SimpleTextUtil.write(output, SI_MIN_VERSION); |
| if (si.getMinVersion() == null) { |
| SimpleTextUtil.write(output, "null", scratch); |
| } else { |
| SimpleTextUtil.write(output, si.getMinVersion().toString(), scratch); |
| } |
| SimpleTextUtil.writeNewline(output); |
| |
| SimpleTextUtil.write(output, SI_DOCCOUNT); |
| SimpleTextUtil.write(output, Integer.toString(si.maxDoc()), scratch); |
| SimpleTextUtil.writeNewline(output); |
| |
| SimpleTextUtil.write(output, SI_USECOMPOUND); |
| SimpleTextUtil.write(output, Boolean.toString(si.getUseCompoundFile()), scratch); |
| SimpleTextUtil.writeNewline(output); |
| |
| Map<String,String> diagnostics = si.getDiagnostics(); |
| int numDiagnostics = diagnostics == null ? 0 : diagnostics.size(); |
| SimpleTextUtil.write(output, SI_NUM_DIAG); |
| SimpleTextUtil.write(output, Integer.toString(numDiagnostics), scratch); |
| SimpleTextUtil.writeNewline(output); |
| |
| if (numDiagnostics > 0) { |
| for (Map.Entry<String,String> diagEntry : diagnostics.entrySet()) { |
| SimpleTextUtil.write(output, SI_DIAG_KEY); |
| SimpleTextUtil.write(output, diagEntry.getKey(), scratch); |
| SimpleTextUtil.writeNewline(output); |
| |
| SimpleTextUtil.write(output, SI_DIAG_VALUE); |
| SimpleTextUtil.write(output, diagEntry.getValue(), scratch); |
| SimpleTextUtil.writeNewline(output); |
| } |
| } |
| |
| Map<String,String> attributes = si.getAttributes(); |
| SimpleTextUtil.write(output, SI_NUM_ATT); |
| SimpleTextUtil.write(output, Integer.toString(attributes.size()), scratch); |
| SimpleTextUtil.writeNewline(output); |
| |
| for (Map.Entry<String,String> attEntry : attributes.entrySet()) { |
| SimpleTextUtil.write(output, SI_ATT_KEY); |
| SimpleTextUtil.write(output, attEntry.getKey(), scratch); |
| SimpleTextUtil.writeNewline(output); |
| |
| SimpleTextUtil.write(output, SI_ATT_VALUE); |
| SimpleTextUtil.write(output, attEntry.getValue(), scratch); |
| SimpleTextUtil.writeNewline(output); |
| } |
| |
| Set<String> files = si.files(); |
| int numFiles = files == null ? 0 : files.size(); |
| SimpleTextUtil.write(output, SI_NUM_FILES); |
| SimpleTextUtil.write(output, Integer.toString(numFiles), scratch); |
| SimpleTextUtil.writeNewline(output); |
| |
| if (numFiles > 0) { |
| for(String fileName : files) { |
| SimpleTextUtil.write(output, SI_FILE); |
| SimpleTextUtil.write(output, fileName, scratch); |
| SimpleTextUtil.writeNewline(output); |
| } |
| } |
| |
| SimpleTextUtil.write(output, SI_ID); |
| SimpleTextUtil.write(output, new BytesRef(si.getId())); |
| SimpleTextUtil.writeNewline(output); |
| |
| Sort indexSort = si.getIndexSort(); |
| SimpleTextUtil.write(output, SI_SORT); |
| final int numSortFields = indexSort == null ? 0 : indexSort.getSort().length; |
| SimpleTextUtil.write(output, Integer.toString(numSortFields), scratch); |
| SimpleTextUtil.writeNewline(output); |
| for (int i = 0; i < numSortFields; ++i) { |
| final SortField sortField = indexSort.getSort()[i]; |
| |
| SimpleTextUtil.write(output, SI_SORT_FIELD); |
| SimpleTextUtil.write(output, sortField.getField(), scratch); |
| SimpleTextUtil.writeNewline(output); |
| |
| SimpleTextUtil.write(output, SI_SORT_TYPE); |
| final String sortTypeString; |
| final SortField.Type sortType; |
| final boolean multiValued; |
| if (sortField instanceof SortedSetSortField) { |
| sortType = SortField.Type.STRING; |
| multiValued = true; |
| } else if (sortField instanceof SortedNumericSortField) { |
| sortType = ((SortedNumericSortField) sortField).getNumericType(); |
| multiValued = true; |
| } else { |
| sortType = sortField.getType(); |
| multiValued = false; |
| } |
| switch (sortType) { |
| case STRING: |
| if (multiValued) { |
| sortTypeString = "multi_valued_string"; |
| } else { |
| sortTypeString = "string"; |
| } |
| break; |
| case LONG: |
| if (multiValued) { |
| sortTypeString = "multi_valued_long"; |
| } else { |
| sortTypeString = "long"; |
| } |
| break; |
| case INT: |
| if (multiValued) { |
| sortTypeString = "multi_valued_int"; |
| } else { |
| sortTypeString = "int"; |
| } |
| break; |
| case DOUBLE: |
| if (multiValued) { |
| sortTypeString = "multi_valued_double"; |
| } else { |
| sortTypeString = "double"; |
| } |
| break; |
| case FLOAT: |
| if (multiValued) { |
| sortTypeString = "multi_valued_float"; |
| } else { |
| sortTypeString = "float"; |
| } |
| break; |
| default: |
| throw new IllegalStateException("Unexpected sort type: " + sortField.getType()); |
| } |
| SimpleTextUtil.write(output, sortTypeString, scratch); |
| SimpleTextUtil.writeNewline(output); |
| |
| if (sortField instanceof SortedSetSortField) { |
| SortedSetSelector.Type selector = ((SortedSetSortField) sortField).getSelector(); |
| final String selectorString; |
| if (selector == SortedSetSelector.Type.MIN) { |
| selectorString = "min"; |
| } else if (selector == SortedSetSelector.Type.MIDDLE_MIN) { |
| selectorString = "middle_min"; |
| } else if (selector == SortedSetSelector.Type.MIDDLE_MAX) { |
| selectorString = "middle_max"; |
| } else if (selector == SortedSetSelector.Type.MAX) { |
| selectorString = "max"; |
| } else { |
| throw new IllegalStateException("Unexpected SortedSetSelector type selector: " + selector); |
| } |
| SimpleTextUtil.write(output, SI_SELECTOR_TYPE); |
| SimpleTextUtil.write(output, selectorString, scratch); |
| SimpleTextUtil.writeNewline(output); |
| } else if (sortField instanceof SortedNumericSortField) { |
| SortedNumericSelector.Type selector = ((SortedNumericSortField) sortField).getSelector(); |
| final String selectorString; |
| if (selector == SortedNumericSelector.Type.MIN) { |
| selectorString = "min"; |
| } else if (selector == SortedNumericSelector.Type.MAX) { |
| selectorString = "max"; |
| } else { |
| throw new IllegalStateException("Unexpected SortedNumericSelector type selector: " + selector); |
| } |
| SimpleTextUtil.write(output, SI_SELECTOR_TYPE); |
| SimpleTextUtil.write(output, selectorString, scratch); |
| SimpleTextUtil.writeNewline(output); |
| } |
| |
| SimpleTextUtil.write(output, SI_SORT_REVERSE); |
| SimpleTextUtil.write(output, Boolean.toString(sortField.getReverse()), scratch); |
| SimpleTextUtil.writeNewline(output); |
| |
| SimpleTextUtil.write(output, SI_SORT_MISSING); |
| final Object missingValue = sortField.getMissingValue(); |
| final String missing; |
| if (missingValue == null) { |
| missing = "null"; |
| } else if (missingValue == SortField.STRING_FIRST) { |
| missing = "first"; |
| } else if (missingValue == SortField.STRING_LAST) { |
| missing = "last"; |
| } else { |
| missing = missingValue.toString(); |
| } |
| SimpleTextUtil.write(output, missing, scratch); |
| SimpleTextUtil.writeNewline(output); |
| } |
| |
| SimpleTextUtil.writeChecksum(output, scratch); |
| } |
| } |
| } |