| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.facet.taxonomy.writercache; |
| |
| import org.apache.lucene.facet.taxonomy.FacetLabel; |
| import org.apache.lucene.util.Accountable; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; |
| import org.apache.lucene.util.ByteBlockPool; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.BytesRefBuilder; |
| import org.apache.lucene.util.BytesRefHash; |
| import org.apache.lucene.util.Counter; |
| import org.apache.lucene.util.RamUsageEstimator; |
| import org.apache.lucene.util.UnicodeUtil; |
| |
| /** A "cache" that never frees memory, and stores labels in a BytesRefHash (utf-8 encoding). */ |
| public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accountable { |
| private final ThreadLocal<BytesRefBuilder> bytes = new ThreadLocal<BytesRefBuilder>() { |
| @Override |
| protected BytesRefBuilder initialValue() { |
| return new BytesRefBuilder(); |
| } |
| }; |
| |
| private final Counter bytesUsed = Counter.newCounter(); |
| private final BytesRefHash map = new BytesRefHash(new ByteBlockPool(new DirectTrackingAllocator(bytesUsed))); |
| |
| private final static int PAGE_BITS = 16; |
| private final static int PAGE_SIZE = 1 << PAGE_BITS; |
| private final static int PAGE_MASK = PAGE_SIZE - 1; |
| |
| private volatile int[][] ordinals; |
| |
| // How many labels we are storing: |
| private int count; |
| |
| // How many pages in ordinals we've allocated: |
| private int pageCount; |
| |
| /** Sole constructor. */ |
| public UTF8TaxonomyWriterCache() { |
| ordinals = new int[1][]; |
| ordinals[0] = new int[PAGE_SIZE]; |
| } |
| |
| @Override |
| public int get(FacetLabel label) { |
| BytesRef bytes = toBytes(label); |
| int id; |
| synchronized (this) { |
| id = map.find(bytes); |
| } |
| if (id == -1) { |
| return LabelToOrdinal.INVALID_ORDINAL; |
| } |
| int page = id >>> PAGE_BITS; |
| int offset = id & PAGE_MASK; |
| return ordinals[page][offset]; |
| } |
| |
| // Called only from assert |
| private boolean assertSameOrdinal(FacetLabel label, int id, int ord) { |
| id = -id - 1; |
| int page = id >>> PAGE_BITS; |
| int offset = id & PAGE_MASK; |
| int oldOrd = ordinals[page][offset]; |
| if (oldOrd != ord) { |
| throw new IllegalArgumentException("label " + label + " was already cached, with old ord=" + oldOrd + " versus new ord=" + ord); |
| } |
| return true; |
| } |
| |
| @Override |
| public boolean put(FacetLabel label, int ord) { |
| BytesRef bytes = toBytes(label); |
| int id; |
| synchronized (this) { |
| id = map.add(bytes); |
| if (id < 0) { |
| assert assertSameOrdinal(label, id, ord); |
| return false; |
| } |
| assert id == count; |
| int page = id >>> PAGE_BITS; |
| int offset = id & PAGE_MASK; |
| if (page == pageCount) { |
| if (page == ordinals.length) { |
| int[][] newOrdinals = new int[ArrayUtil.oversize(page+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][]; |
| System.arraycopy(ordinals, 0, newOrdinals, 0, ordinals.length); |
| ordinals = newOrdinals; |
| } |
| ordinals[page] = new int[PAGE_SIZE]; |
| pageCount++; |
| } |
| ordinals[page][offset] = ord; |
| count++; |
| |
| // we never prune from the cache |
| return false; |
| } |
| } |
| |
| @Override |
| public boolean isFull() { |
| // we are never full |
| return false; |
| } |
| |
| @Override |
| public synchronized void clear() { |
| map.clear(); |
| map.reinit(); |
| ordinals = new int[1][]; |
| ordinals[0] = new int[PAGE_SIZE]; |
| count = 0; |
| pageCount = 0; |
| assert bytesUsed.get() == 0; |
| } |
| |
| /** How many labels are currently stored in the cache. */ |
| public int size() { |
| return count; |
| } |
| |
| @Override |
| public synchronized long ramBytesUsed() { |
| return bytesUsed.get() + pageCount * PAGE_SIZE * Integer.BYTES; |
| } |
| |
| @Override |
| public void close() { |
| } |
| |
| private static final byte DELIM_CHAR = (byte) 0x1F; |
| |
| private BytesRef toBytes(FacetLabel label) { |
| BytesRefBuilder bytes = this.bytes.get(); |
| bytes.clear(); |
| for (int i = 0; i < label.length; i++) { |
| String part = label.components[i]; |
| if (i > 0) { |
| bytes.append(DELIM_CHAR); |
| } |
| bytes.grow(bytes.length() + UnicodeUtil.maxUTF8Length(part.length())); |
| bytes.setLength(UnicodeUtil.UTF16toUTF8(part, 0, part.length(), bytes.bytes(), bytes.length())); |
| } |
| return bytes.get(); |
| } |
| } |