update subcolumn (#15453)
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/BPTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/BPTest.java new file mode 100644 index 0000000..36f5a35 --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/BPTest.java
@@ -0,0 +1,660 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +public class BPTest { + + public static int bitWidth(int value) { + return 32 - Integer.numberOfLeadingZeros(value); + } + + public static void intToBytes(int srcNum, byte[] result, int pos, int width) { + int cnt = pos & 0x07; + int index = pos >> 3; + while (width > 0) { + int m = width + cnt >= 8 ? 8 - cnt : width; + width -= m; + int mask = 1 << (8 - cnt); + cnt += m; + byte y = (byte) (srcNum >>> width); + y = (byte) (y << (8 - cnt)); + mask = ~(mask - (1 << (8 - cnt))); + result[index] = (byte) (result[index] & mask | y); + srcNum = srcNum & ~(-1 << width); + if (cnt == 8) { + index++; + cnt = 0; + } + } + } + + public static int bytesToInt(byte[] result, int pos, int width) { + int ret = 0; + int cnt = pos & 0x07; + int index = pos >> 3; + while (width > 0) { + int m = width + cnt >= 8 ? 8 - cnt : width; + width -= m; + ret = ret << m; + byte y = (byte) (result[index] & (0xff >> cnt)); + y = (byte) ((y & 0xff) >>> (8 - cnt - m)); + ret = ret | (y & 0xff); + cnt += m; + if (cnt == 8) { + cnt = 0; + index++; + } + } + return ret; + } + + public static void pack8Values(int[] values, int offset, int width, int encode_pos, + byte[] encoded_result) { + int bufIdx = 0; + int valueIdx = offset; + // remaining bits for the current unfinished Integer + int leftBit = 0; + + while (valueIdx < 8 + offset) { + // buffer is used for saving 32 bits as a part of result + int buffer = 0; + // remaining size of bits in the 'buffer' + int leftSize = 32; + + // encode the left bits of current Integer to 'buffer' + if (leftBit > 0) { + buffer |= (values[valueIdx] << (32 - leftBit)); + leftSize -= leftBit; + leftBit = 0; + valueIdx++; + } + + while (leftSize >= width && valueIdx < 8 + offset) { + // encode one Integer to the 'buffer' + buffer |= (values[valueIdx] << (leftSize - width)); + leftSize -= width; + valueIdx++; + } + // If the remaining space of the buffer can not save the bits for one Integer, + if (leftSize > 0 && valueIdx < 8 + offset) { + // put the first 'leftSize' bits of the Integer into remaining space of the + // buffer + buffer |= (values[valueIdx] >>> (width - leftSize)); + leftBit = width - leftSize; + } + + // put the buffer into the final result + for (int j = 0; j < 4; j++) { + encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); + encode_pos++; + bufIdx++; + if (bufIdx >= width) { + return; + } + } + } + + } + + public static void unpack8Values(byte[] encoded, int offset, int width, int[] result_list, int result_offset) { + int byteIdx = offset; + long buffer = 0; + // total bits which have read from 'buf' to 'buffer'. i.e., + // number of available bits to be decoded. + int totalBits = 0; + int valueIdx = 0; + + while (valueIdx < 8) { + // If current available bits are not enough to decode one Integer, + // then add next byte from buf to 'buffer' until totalBits >= width + while (totalBits < width) { + buffer = (buffer << 8) | (encoded[byteIdx] & 0xFF); + byteIdx++; + totalBits += 8; + } + + // If current available bits are enough to decode one Integer, + // then decode one Integer one by one until left bits in 'buffer' is + // not enough to decode one Integer. + while (totalBits >= width && valueIdx < 8) { + // result_list.add((int) (buffer >>> (totalBits - width))); + result_list[result_offset + valueIdx] = (int) (buffer >>> (totalBits - width)); + valueIdx++; + totalBits -= width; + buffer = buffer & ((1L << totalBits) - 1); + } + } + } + + public static int bitPacking(int[] numbers, int bit_width, int encode_pos, + byte[] encoded_result, int num_values) { + int block_num = num_values / 8; + int remainder = num_values % 8; + + for (int i = 0; i < block_num; i++) { + pack8Values(numbers, i * 8, bit_width, encode_pos, encoded_result); + encode_pos += bit_width; + } + + encode_pos *= 8; + + for (int i = 0; i < remainder; i++) { + intToBytes(numbers[block_num * 8 + i], encoded_result, encode_pos, bit_width); + encode_pos += bit_width; + } + + return (encode_pos + 7) / 8; + } + + public static int decodeBitPacking( + byte[] encoded, int decode_pos, int bit_width, int num_values, int[] result_list) { + // ArrayList<Integer> result_list = new ArrayList<>(); + // int[] result_list = new int[num_values]; + int block_num = num_values / 8; + int remainder = num_values % 8; + + for (int i = 0; i < block_num; i++) { // bitpacking + unpack8Values(encoded, decode_pos, bit_width, result_list, i * 8); + decode_pos += bit_width; + } + + decode_pos *= 8; + + for (int i = 0; i < remainder; i++) { + result_list[block_num * 8 + i] = bytesToInt(encoded, decode_pos, bit_width); + decode_pos += bit_width; + } + + return (decode_pos + 7) / 8; + } + + public static int BPEncoder(int[] list, int encode_pos, byte[] encoded_result) { + int list_length = list.length; + int maxValue = 0; + for (int i = 0; i < list_length; i++) { + if (list[i] > maxValue) { + maxValue = list[i]; + } + } + + int m = bitWidth(maxValue); + // System.out.println("m: " + m); + + // writeBits(encoded_result, startBitPosition, 8, m); + // startBitPosition += 8; + encoded_result[encode_pos] = (byte) m; + encode_pos += 1; + + // bitPacking(list, encoded_result, startBitPosition, m, list_length); + // startBitPosition += m * list_length; + encode_pos = bitPacking(list, m, encode_pos, encoded_result, list_length); + + return encode_pos; + } + + public static int BPDecoder(byte[] encoded_result, int encode_pos, int[] list) { + int list_length = list.length; + + int m = encoded_result[encode_pos]; + encode_pos += 1; + + int[] new_list = new int[list_length]; + encode_pos = decodeBitPacking(encoded_result, encode_pos, m, list_length, new_list); + + for (int i = 0; i < list_length; i++) { + list[i] = new_list[i]; + } + + return encode_pos; + } + + public static int[] getAbsDeltaTsBlock( + int[] ts_block, + int i, + int block_size, + int remaining, + int[] min_delta) { + int[] ts_block_delta = new int[remaining]; + + int value_delta_min = Integer.MAX_VALUE; + int value_delta_max = Integer.MIN_VALUE; + int base = i * block_size; + int end = i * block_size + remaining; + + for (int j = base; j < end; j++) { + int cur = ts_block[j]; + if (cur < value_delta_min) { + value_delta_min = cur; + } + if (cur > value_delta_max) { + value_delta_max = cur; + } + } + + for (int j = base; j < end; j++) { + ts_block_delta[j - base] = ts_block[j] - value_delta_min; + } + + min_delta[0] = value_delta_min; + + return ts_block_delta; + } + + public static int BlockEncoder(int[] data, int block_index, int block_size, int remainder, + int encode_pos, byte[] encoded_result) { + int[] min_delta = new int[3]; + + int[] data_delta = getAbsDeltaTsBlock(data, block_index, block_size, + remainder, min_delta); + + encoded_result[encode_pos] = (byte) (min_delta[0] >> 24); + encoded_result[encode_pos + 1] = (byte) (min_delta[0] >> 16); + encoded_result[encode_pos + 2] = (byte) (min_delta[0] >> 8); + encoded_result[encode_pos + 3] = (byte) min_delta[0]; + encode_pos += 4; + + encode_pos = BPEncoder(data_delta, encode_pos, + encoded_result); + + return encode_pos; + } + + public static int BlockDecoder(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int[] data) { + int[] min_delta = new int[3]; + + min_delta[0] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int[] block_data = new int[remainder]; + + encode_pos = BPDecoder(encoded_result, encode_pos, + block_data); + + for (int i = 0; i < remainder; i++) { + data[block_index * block_size + i] = block_data[i] + min_delta[0]; + } + + return encode_pos; + } + + public static int Encoder(int[] data, int block_size, byte[] encoded_result) { + int data_length = data.length; + int encode_pos = 0; + + encoded_result[0] = (byte) (data_length >> 24); + encoded_result[1] = (byte) (data_length >> 16); + encoded_result[2] = (byte) (data_length >> 8); + encoded_result[3] = (byte) data_length; + encode_pos += 4; + + encoded_result[4] = (byte) (block_size >> 24); + encoded_result[5] = (byte) (block_size >> 16); + encoded_result[6] = (byte) (block_size >> 8); + encoded_result[7] = (byte) block_size; + encode_pos += 4; + + int num_blocks = data_length / block_size; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockEncoder(data, i, block_size, block_size, encode_pos, encoded_result); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + int value = data[num_blocks * block_size + i]; + encoded_result[encode_pos] = (byte) (value >> 24); + encoded_result[encode_pos + 1] = (byte) (value >> 16); + encoded_result[encode_pos + 2] = (byte) (value >> 8); + encoded_result[encode_pos + 3] = (byte) value; + encode_pos += 4; + } + } else { + encode_pos = BlockEncoder(data, num_blocks, block_size, remainder, encode_pos, + encoded_result); + } + + return encode_pos; + } + + public static int[] Decoder(byte[] encoded_result) { + int encode_pos = 0; + + int data_length = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) + | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int block_size = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + int[] data = new int[data_length]; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockDecoder(encoded_result, i, block_size, block_size, encode_pos, data); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + data[num_blocks * block_size + i] = ((encoded_result[encode_pos] & 0xFF) << 24) | + ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + } + } else { + encode_pos = BlockDecoder(encoded_result, num_blocks, block_size, remainder, + encode_pos, data); + } + + return data; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testSubcolumn() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/"; + // String output_parent_dir = parent_dir + "result/"; + + String outputPath = output_parent_dir + "bp.csv"; + + int block_size = 1024; + + int repeatTime = 100; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + // String value = loader.getValues()[index]; + data1.add(Float.valueOf(f_str)); + // data2.add(Integer.valueOf(loader.getValues()[1])); + // data.add(Integer.valueOf(value)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = Encoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + ratio += ratioTmp; + + System.out.println("Decode"); + + int[] data2_arr_decoded = new int[data1.size()]; + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + data2_arr_decoded = Decoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "BP", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + System.out.println(ratio); + } + + writer.close(); + } + + @Test + public void testTransData() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String output_parent_dir = "D:/encoding-subcolumn/trans_data_result/"; + // String output_parent_dir = parent_dir + "trans_data_result/"; + + String input_parent_dir = parent_dir + "trans_data/"; + + ArrayList<String> input_path_list = new ArrayList<>(); + ArrayList<String> output_path_list = new ArrayList<>(); + ArrayList<String> dataset_name = new ArrayList<>(); + ArrayList<Integer> dataset_block_size = new ArrayList<>(); + + try (Stream<Path> paths = Files.walk(Paths.get(input_parent_dir))) { + paths.filter(Files::isDirectory) + .filter(path -> !path.equals(Paths.get(input_parent_dir))) + .forEach(dir -> { + String name = dir.getFileName().toString(); + dataset_name.add(name); + input_path_list.add(dir.toString()); + dataset_block_size.add(1024); + }); + } + + String outputPath = output_parent_dir + "bp.csv"; + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + int repeatTime = 100; + + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { + + String inputPath = input_path_list.get(file_i); + System.out.println(inputPath); + + File file = new File(inputPath); + File[] tempList = file.listFiles(); + + long totalEncodeTime = 0; + long totalDecodeTime = 0; + double totalCompressedSize = 0; + int totalPoints = 0; + + for (File f : tempList) { + String datasetName = extractFileName(f.toString()); + InputStream inputStream = Files.newInputStream(f.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Integer> data1 = new ArrayList<>(); + ArrayList<Integer> data2 = new ArrayList<>(); + + loader.readHeaders(); + while (loader.readRecord()) { + // String value = loader.getValues()[index]; + data1.add(Integer.valueOf(loader.getValues()[0])); + data2.add(Integer.valueOf(loader.getValues()[1])); + // data.add(Integer.valueOf(value)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + for (int i = 0; i < data2.size(); i++) { + data2_arr[i] = data2.get(i); + } + byte[] encoded_result = new byte[data2_arr.length * 4]; + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = Encoder(data2_arr, dataset_block_size.get(file_i), encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + ratio += ratioTmp; + s = System.nanoTime(); + + int[] data2_arr_decoded = new int[data1.size()]; + + for (int repeat = 0; repeat < repeatTime; repeat++) { + data2_arr_decoded = Decoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + totalEncodeTime += encodeTime; + totalDecodeTime += decodeTime; + totalCompressedSize += compressed_size; + totalPoints += data1.size(); + + } + + double compressionRatio = totalCompressedSize / (totalPoints * Integer.BYTES); + + String[] record = { + dataset_name.get(file_i), + "BP", + String.valueOf(totalEncodeTime), + String.valueOf(totalDecodeTime), + String.valueOf(totalPoints), + String.valueOf(totalCompressedSize), + String.valueOf(compressionRatio) + }; + + writer.writeRecord(record); + System.out.println(compressionRatio); + } + writer.close(); + } + +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/BUFFTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/BUFFTest.java new file mode 100644 index 0000000..57ad31d --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/BUFFTest.java
@@ -0,0 +1,755 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +public class BUFFTest { + + public static int bitWidth(int value) { + return 32 - Integer.numberOfLeadingZeros(value); + } + + public static void intToBytes(int srcNum, byte[] result, int pos, int width) { + int cnt = pos & 0x07; + int index = pos >> 3; + while (width > 0) { + int m = width + cnt >= 8 ? 8 - cnt : width; + width -= m; + int mask = 1 << (8 - cnt); + cnt += m; + byte y = (byte) (srcNum >>> width); + y = (byte) (y << (8 - cnt)); + mask = ~(mask - (1 << (8 - cnt))); + result[index] = (byte) (result[index] & mask | y); + srcNum = srcNum & ~(-1 << width); + if (cnt == 8) { + index++; + cnt = 0; + } + } + } + + public static int bytesToInt(byte[] result, int pos, int width) { + int ret = 0; + int cnt = pos & 0x07; + int index = pos >> 3; + while (width > 0) { + int m = width + cnt >= 8 ? 8 - cnt : width; + width -= m; + ret = ret << m; + byte y = (byte) (result[index] & (0xff >> cnt)); + y = (byte) ((y & 0xff) >>> (8 - cnt - m)); + ret = ret | (y & 0xff); + cnt += m; + if (cnt == 8) { + cnt = 0; + index++; + } + } + return ret; + } + + public static void pack8Values(int[] values, int offset, int width, int encode_pos, + byte[] encoded_result) { + int bufIdx = 0; + int valueIdx = offset; + // remaining bits for the current unfinished Integer + int leftBit = 0; + + while (valueIdx < 8 + offset) { + // buffer is used for saving 32 bits as a part of result + int buffer = 0; + // remaining size of bits in the 'buffer' + int leftSize = 32; + + // encode the left bits of current Integer to 'buffer' + if (leftBit > 0) { + buffer |= (values[valueIdx] << (32 - leftBit)); + leftSize -= leftBit; + leftBit = 0; + valueIdx++; + } + + while (leftSize >= width && valueIdx < 8 + offset) { + // encode one Integer to the 'buffer' + buffer |= (values[valueIdx] << (leftSize - width)); + leftSize -= width; + valueIdx++; + } + // If the remaining space of the buffer can not save the bits for one Integer, + if (leftSize > 0 && valueIdx < 8 + offset) { + // put the first 'leftSize' bits of the Integer into remaining space of the + // buffer + buffer |= (values[valueIdx] >>> (width - leftSize)); + leftBit = width - leftSize; + } + + // put the buffer into the final result + for (int j = 0; j < 4; j++) { + encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); + encode_pos++; + bufIdx++; + if (bufIdx >= width) { + return; + } + } + } + + } + + public static void unpack8Values(byte[] encoded, int offset, int width, int[] result_list, int result_offset) { + int byteIdx = offset; + long buffer = 0; + // total bits which have read from 'buf' to 'buffer'. i.e., + // number of available bits to be decoded. + int totalBits = 0; + int valueIdx = 0; + + while (valueIdx < 8) { + // If current available bits are not enough to decode one Integer, + // then add next byte from buf to 'buffer' until totalBits >= width + while (totalBits < width) { + buffer = (buffer << 8) | (encoded[byteIdx] & 0xFF); + byteIdx++; + totalBits += 8; + } + + // If current available bits are enough to decode one Integer, + // then decode one Integer one by one until left bits in 'buffer' is + // not enough to decode one Integer. + while (totalBits >= width && valueIdx < 8) { + // result_list.add((int) (buffer >>> (totalBits - width))); + result_list[result_offset + valueIdx] = (int) (buffer >>> (totalBits - width)); + valueIdx++; + totalBits -= width; + buffer = buffer & ((1L << totalBits) - 1); + } + } + } + + public static int bitPacking(int[] numbers, int bit_width, int encode_pos, + byte[] encoded_result, int num_values) { + int block_num = num_values / 8; + int remainder = num_values % 8; + + for (int i = 0; i < block_num; i++) { + pack8Values(numbers, i * 8, bit_width, encode_pos, encoded_result); + encode_pos += bit_width; + } + + encode_pos *= 8; + + for (int i = 0; i < remainder; i++) { + intToBytes(numbers[block_num * 8 + i], encoded_result, encode_pos, bit_width); + encode_pos += bit_width; + } + + return (encode_pos + 7) / 8; + } + + public static int decodeBitPacking( + byte[] encoded, int decode_pos, int bit_width, int num_values, int[] result_list) { + // ArrayList<Integer> result_list = new ArrayList<>(); + // int[] result_list = new int[num_values]; + int block_num = num_values / 8; + int remainder = num_values % 8; + + for (int i = 0; i < block_num; i++) { // bitpacking + unpack8Values(encoded, decode_pos, bit_width, result_list, i * 8); + decode_pos += bit_width; + } + + decode_pos *= 8; + + for (int i = 0; i < remainder; i++) { + result_list[block_num * 8 + i] = bytesToInt(encoded, decode_pos, bit_width); + decode_pos += bit_width; + } + + return (decode_pos + 7) / 8; + } + + public static int[] bits_needed = { 0, 5, 8, 11, 15, 18, 21, 25 }; + + public static int BlockEncoder(float[] data, int block_index, int block_size, int remainder, int max_decimal, + int encode_pos, byte[] encoded_result) { + + int[] sign_bits = new int[remainder]; + int[] integer_parts = new int[remainder]; + int[] decimal_parts = new int[remainder]; + + int min_integer_part = Integer.MAX_VALUE; + int max_integer_part = Integer.MIN_VALUE; + + // for (int i = 0; i < remainder; i++) { + // System.out.print(data[block_index * block_size + i] + " "); + // } + // System.out.println(); + + for (int i = 0; i < remainder; i++) { + float value = data[block_index * block_size + i]; + + if (value < 0) { + sign_bits[i] = 1; + } + + int currentInt = (int) Math.abs(value); + integer_parts[i] = currentInt; + + if (currentInt < min_integer_part) { + min_integer_part = currentInt; + } + + if (currentInt > max_integer_part) { + max_integer_part = currentInt; + } + + int bits = Float.floatToIntBits(value); + + // int sign = (bits >> 31) & 1; + int exponent = (bits >> 23) & 0xFF; + int mantissa = bits & 0x7FFFFF; + + int actualExponent = exponent - 127; + + if (actualExponent >= 0) { + int mask = (1 << (23 - actualExponent)) - 1; + mantissa &= mask; + } else { + mantissa += 1 << 23; + } + + int shift = 23 - actualExponent - bits_needed[max_decimal]; + + if (shift < 0) { + mantissa <<= -shift; + } else { + mantissa >>= shift; + } + + if (exponent == 0) { + mantissa = 0; + } + + decimal_parts[i] = mantissa; + } + + encoded_result[encode_pos] = (byte) (min_integer_part >> 24); + encoded_result[encode_pos + 1] = (byte) (min_integer_part >> 16); + encoded_result[encode_pos + 2] = (byte) (min_integer_part >> 8); + encoded_result[encode_pos + 3] = (byte) min_integer_part; + encode_pos += 4; + + // System.out.println("min_integer_part: " + min_integer_part); + // System.out.println("max_integer_part: " + max_integer_part); + + int bw = bitWidth(max_integer_part - min_integer_part); + + encoded_result[encode_pos] = (byte) bw; + encode_pos += 1; + + for (int i = 0; i < remainder; i++) { + integer_parts[i] -= min_integer_part; + } + + // int[] combined = new int[remainder]; + // for (int i = 0; i < remainder; i++) { + // combined[i] = (sign_bits[i] << (bw + bits_needed[max_decimal])) | + // (integer_parts[i] << bits_needed[max_decimal]) | decimal_parts[i]; + // } + + // int totalBitWidth = 1 + bw + bits_needed[max_decimal]; + + // encode_pos = bitPacking(combined, totalBitWidth, encode_pos, encoded_result, + // remainder); + + int totalBitWidth = 1 + bw + bits_needed[max_decimal]; + + int intArrayCount = (totalBitWidth + 7) / 8; + + int[][] combinedArrays = new int[intArrayCount][remainder]; + + for (int i = 0; i < intArrayCount; i++) { + for (int j = 0; j < remainder; j++) { + long combined = (sign_bits[j] << (bw + bits_needed[max_decimal])) + | (integer_parts[j] << bits_needed[max_decimal]) | decimal_parts[j]; + combinedArrays[i][j] = (int) ((combined >> (i * 8)) & 0xFF); + } + } + + for (int i = 0; i < intArrayCount; i++) { + int currentBitWidth = Math.min(8, totalBitWidth - i * 8); + encode_pos = bitPacking(combinedArrays[i], currentBitWidth, encode_pos, encoded_result, remainder); + } + + return encode_pos; + } + + public static int BlockDecoder(byte[] encoded_result, int block_index, int block_size, int remainder, + int max_decimal, int encode_pos, float[] data) { + + int[] sign_bits = new int[remainder]; + int[] integer_parts = new int[remainder]; + int[] decimal_parts = new int[remainder]; + + int min_integer_part = ((encoded_result[encode_pos] & 0xFF) << 24) + | ((encoded_result[encode_pos + 1] & 0xFF) << 16) + | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + // System.out.println("min_integer_part: " + min_integer_part); + + int bw = encoded_result[encode_pos]; + encode_pos += 1; + + // int[] combined = new int[remainder]; + + // encode_pos = decodeBitPacking(encoded_result, encode_pos, 1 + bw + + // bits_needed[max_decimal], remainder, combined); + + // for (int i = 0; i < remainder; i++) { + // int value = combined[i]; + // sign_bits[i] = (value >> (bw + bits_needed[max_decimal])) & 1; + // integer_parts[i] = (value >> bits_needed[max_decimal]) & ((1 << bw) - 1); + // integer_parts[i] += min_integer_part; + // decimal_parts[i] = value & ((1 << bits_needed[max_decimal]) - 1); + // } + + int totalBitWidth = 1 + bw + bits_needed[max_decimal]; + + int intArrayCount = (totalBitWidth + 7) / 8; + + int[][] combinedArrays = new int[intArrayCount][remainder]; + + long[] combined = new long[remainder]; + + for (int i = 0; i < intArrayCount; i++) { + int currentBitWidth = Math.min(8, totalBitWidth - i * 8); + encode_pos = decodeBitPacking(encoded_result, encode_pos, currentBitWidth, remainder, combinedArrays[i]); + for (int j = 0; j < remainder; j++) { + combined[j] |= ((long) combinedArrays[i][j]) << (i * 8); + } + } + + for (int i = 0; i < remainder; i++) { + sign_bits[i] = (int) ((combined[i] >> (bw + bits_needed[max_decimal])) & 1); + integer_parts[i] = (int) ((combined[i] >> bits_needed[max_decimal]) & ((1 << bw) - 1)); + integer_parts[i] += min_integer_part; + decimal_parts[i] = (int) (combined[i] & ((1 << bits_needed[max_decimal]) - 1)); + } + + for (int i = 0; i < remainder; i++) { + float decimal = decimal_parts[i]; + for (int j = 0; j < bits_needed[max_decimal]; j++) { + decimal /= 2; + } + float value = (float) (integer_parts[i] + decimal); + value = sign_bits[i] == 1 ? -value : value; + data[block_index * block_size + i] = value; + } + + // for (int i = 0; i < remainder; i++) { + // System.out.print(data[block_index * block_size + i] + " "); + // } + // System.out.println(); + + return encode_pos; + } + + public static int Encoder(float[] data, int block_size, int max_decimal, byte[] encoded_result) { + int data_length = data.length; + int encode_pos = 0; + + encoded_result[0] = (byte) (data_length >> 24); + encoded_result[1] = (byte) (data_length >> 16); + encoded_result[2] = (byte) (data_length >> 8); + encoded_result[3] = (byte) data_length; + encode_pos += 4; + + encoded_result[4] = (byte) (block_size >> 24); + encoded_result[5] = (byte) (block_size >> 16); + encoded_result[6] = (byte) (block_size >> 8); + encoded_result[7] = (byte) block_size; + encode_pos += 4; + + encoded_result[8] = (byte) max_decimal; + encode_pos += 1; + + int num_blocks = data_length / block_size; + + int remainder = data_length % block_size; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockEncoder(data, i, block_size, block_size, max_decimal, encode_pos, encoded_result); + } + + if (remainder > 0) { + encode_pos = BlockEncoder(data, num_blocks, block_size, remainder, max_decimal, encode_pos, encoded_result); + } + + // if (remainder <= 3) { + // for (int i = 0; i < remainder; i++) { + // int value = data[num_blocks * block_size + i]; + // encoded_result[encode_pos] = (byte) (value >> 24); + // encoded_result[encode_pos + 1] = (byte) (value >> 16); + // encoded_result[encode_pos + 2] = (byte) (value >> 8); + // encoded_result[encode_pos + 3] = (byte) value; + // encode_pos += 4; + // } + // } else { + // encode_pos = BlockEncoder(data, num_blocks, block_size, remainder, + // max_decimal, encode_pos, + // encoded_result); + // } + + return encode_pos; + } + + public static float[] Decoder(byte[] encoded_result) { + int encode_pos = 0; + + int data_length = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) + | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int block_size = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int max_decimal = encoded_result[encode_pos]; + encode_pos += 1; + + int num_blocks = data_length / block_size; + + int remainder = data_length % block_size; + + float[] data = new float[data_length]; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockDecoder(encoded_result, i, block_size, block_size, max_decimal, encode_pos, data); + } + + if (remainder > 0) { + encode_pos = BlockDecoder(encoded_result, num_blocks, block_size, remainder, max_decimal, encode_pos, data); + } + + // if (remainder <= 3) { + // for (int i = 0; i < remainder; i++) { + // data[num_blocks * block_size + i] = ((encoded_result[encode_pos] & 0xFF) << + // 24) | + // ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + // ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + + // 3] & 0xFF); + // encode_pos += 4; + // } + // } else { + // encode_pos = BlockDecoder(encoded_result, num_blocks, block_size, remainder, + // encode_pos, data); + // } + + return data; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testSubcolumn() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/"; + // String output_parent_dir = parent_dir + "result/"; + + String outputPath = output_parent_dir + "buff.csv"; + + int block_size = 1024; + + int repeatTime = 100; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) + max_decimal = cur_decimal; + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + // int[] data2_arr = new int[data1.size()]; + float[] data2_arr = new float[data1.size()]; + + // int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + // data2_arr[i] = (int) (data1.get(i) * max_mul); + data2_arr[i] = data1.get(i); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = Encoder(data2_arr, block_size, max_decimal, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + ratio += ratioTmp; + + System.out.println("Decode"); + + float[] data2_arr_decoded = new float[data1.size()]; + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + data2_arr_decoded = Decoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "BUFF", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + System.out.println(ratio); + } + + writer.close(); + } + + @Test + public void testTransData() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String output_parent_dir = "D:/encoding-subcolumn/trans_data_result/"; + // String output_parent_dir = parent_dir + "trans_data_result/"; + + String input_parent_dir = parent_dir + "trans_data/"; + + ArrayList<String> input_path_list = new ArrayList<>(); + ArrayList<String> output_path_list = new ArrayList<>(); + ArrayList<String> dataset_name = new ArrayList<>(); + ArrayList<Integer> dataset_block_size = new ArrayList<>(); + + try (Stream<Path> paths = Files.walk(Paths.get(input_parent_dir))) { + paths.filter(Files::isDirectory) + .filter(path -> !path.equals(Paths.get(input_parent_dir))) + .forEach(dir -> { + String name = dir.getFileName().toString(); + dataset_name.add(name); + input_path_list.add(dir.toString()); + dataset_block_size.add(1024); + }); + } + + String outputPath = output_parent_dir + "buff.csv"; + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + int repeatTime = 100; + + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { + + String inputPath = input_path_list.get(file_i); + System.out.println(inputPath); + + File file = new File(inputPath); + File[] tempList = file.listFiles(); + + long totalEncodeTime = 0; + long totalDecodeTime = 0; + double totalCompressedSize = 0; + int totalPoints = 0; + + for (File f : tempList) { + String datasetName = extractFileName(f.toString()); + InputStream inputStream = Files.newInputStream(f.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Integer> data1 = new ArrayList<>(); + ArrayList<Integer> data2 = new ArrayList<>(); + + int max_decimal = 0; + loader.readHeaders(); + while (loader.readRecord()) { + // String value = loader.getValues()[index]; + data1.add(Integer.valueOf(loader.getValues()[0])); + data2.add(Integer.valueOf(loader.getValues()[1])); + int cur_decimal = getDecimalPrecision(loader.getValues()[1]); + max_decimal = Math.max(max_decimal, cur_decimal); + // data.add(Integer.valueOf(value)); + } + inputStream.close(); + float[] data2_arr = new float[data1.size()]; + for (int i = 0; i < data2.size(); i++) { + data2_arr[i] = data2.get(i); + } + byte[] encoded_result = new byte[data2_arr.length * 4]; + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = Encoder(data2_arr, dataset_block_size.get(file_i), max_decimal, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + ratio += ratioTmp; + s = System.nanoTime(); + + float[] data2_arr_decoded = new float[data1.size()]; + + for (int repeat = 0; repeat < repeatTime; repeat++) { + data2_arr_decoded = Decoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + totalEncodeTime += encodeTime; + totalDecodeTime += decodeTime; + totalCompressedSize += compressed_size; + totalPoints += data1.size(); + + } + + double compressionRatio = totalCompressedSize / (totalPoints * Integer.BYTES); + + String[] record = { + dataset_name.get(file_i), + "BUFF", + String.valueOf(totalEncodeTime), + String.valueOf(totalDecodeTime), + String.valueOf(totalPoints), + String.valueOf(totalCompressedSize), + String.valueOf(compressionRatio) + }; + + writer.writeRecord(record); + System.out.println(compressionRatio); + } + writer.close(); + } +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java index db3877a..7d6d8b5 100644 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java
@@ -21,62 +21,37 @@ import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; +import java.util.stream.Stream; public class EncodeTest { public static void main(@NotNull String[] args) throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/sota_ratio/"; + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String output_parent_dir = "D:/encoding-subcolumn/trans_data_result/"; + // String output_parent_dir = parent_dir + "trans_data_result/"; + String input_parent_dir = parent_dir + "trans_data/"; + ArrayList<String> input_path_list = new ArrayList<>(); ArrayList<String> output_path_list = new ArrayList<>(); ArrayList<String> dataset_name = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String item : dataset_name) { - input_path_list.add(input_parent_dir + item); + try (Stream<Path> paths = Files.walk(Paths.get(input_parent_dir))) { + paths.filter(Files::isDirectory) + .filter(path -> !path.equals(Paths.get(input_parent_dir))) + .forEach(dir -> { + String name = dir.getFileName().toString(); + dataset_name.add(name); + input_path_list.add(input_parent_dir + name); + output_path_list.add(output_parent_dir + name + "_ratio.csv"); + }); } - output_path_list.add(output_parent_dir + "CS-Sensors_ratio.csv"); // 0 - - output_path_list.add(output_parent_dir + "Metro-Traffic_ratio.csv");// 1 - - output_path_list.add(output_parent_dir + "USGS-Earthquakes_ratio.csv");// 2 - - output_path_list.add(output_parent_dir + "YZ-Electricity_ratio.csv"); // 3 - - output_path_list.add(output_parent_dir + "GW-Magnetic_ratio.csv"); //4 - - output_path_list.add(output_parent_dir + "TY-Fuel_ratio.csv");//5 - - output_path_list.add(output_parent_dir + "Cyber-Vehicle_ratio.csv"); //6 - - output_path_list.add(output_parent_dir + "Vehicle-Charge_ratio.csv");//7 - - output_path_list.add(output_parent_dir + "Nifty-Stocks_ratio.csv");//8 - - output_path_list.add(output_parent_dir + "TH-Climate_ratio.csv");//9 - - output_path_list.add(output_parent_dir + "TY-Transport_ratio.csv");//10 - - output_path_list.add(output_parent_dir + "EPM-Education_ratio.csv");//11 - -// for (int file_i = 7; file_i < 8; file_i++) { for (int file_i = 0; file_i < input_path_list.size(); file_i++) { String inputPath = input_path_list.get(file_i); String Output = output_path_list.get(file_i); @@ -90,34 +65,33 @@ // select encoding algorithms TSEncoding[] encodingList = { - TSEncoding.PLAIN , - TSEncoding.TS_2DIFF, - TSEncoding.RLE, - TSEncoding.SPRINTZ, - TSEncoding.GORILLA, - TSEncoding.RLBE, - TSEncoding.CHIMP, -// TSEncoding.BUFF + TSEncoding.PLAIN, + TSEncoding.TS_2DIFF, + TSEncoding.RLE, + TSEncoding.SPRINTZ, + TSEncoding.GORILLA, + TSEncoding.RLBE, + TSEncoding.CHIMP, }; CompressionType[] compressList = { - CompressionType.UNCOMPRESSED, + CompressionType.UNCOMPRESSED, }; CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); String[] head = { - "Input Direction", - "Column Index", - "Encoding Algorithm", - "Compress Algorithm", - "Encoding Time", - "Decoding Time", - "Compress Time", - "Uncompress Time", - "Points", - "Compressed Size", - "Compression Ratio" + "Input Direction", + "Column Index", + "Encoding Algorithm", + "Compress Algorithm", + "Encoding Time", + "Decoding Time", + "Compress Time", + "Uncompress Time", + "Points", + "Compressed Size", + "Compression Ratio" }; writer.writeRecord(head); // write header to output file @@ -133,13 +107,13 @@ ArrayList<String> data = new ArrayList<>(); for (int index : columnIndexes) { - if (index == 0){ + if (index == 0) { continue; } int max_precision = 2; - if (file_i == 3){ + if (file_i == 3) { max_precision = 4; - } else if (file_i == 4 ||file_i == 5) { + } else if (file_i == 4 || file_i == 5) { max_precision = 3; } loader.readHeaders(); @@ -148,90 +122,87 @@ String v = loader.getValues()[index]; data.add(v); - if (!v.matches("-?\\d+")){ + if (!v.matches("-?\\d+")) { dataTypeName = "float"; } } inputStream.close(); switch (dataTypeName) { - case "int": - { - TSDataType dataType = TSDataType.INT32; // set TSDataType - ArrayList<Integer> tmp = new ArrayList<>(); - for (String value : data) { - tmp.add(Integer.valueOf(value)); - } - // Iterate over each encoding algorithm - for (TSEncoding encoding : encodingList) { - Encoder encoder = - TSEncodingBuilder.getEncodingBuilder(encoding).getEncoder(dataType); - Decoder decoder = Decoder.getDecoderByType(encoding, dataType); - long encodeTime = 0; - long decodeTime = 0; + case "int": { + TSDataType dataType = TSDataType.INT32; // set TSDataType + ArrayList<Integer> tmp = new ArrayList<>(); + for (String value : data) { + tmp.add(Integer.valueOf(value)); + } + // Iterate over each encoding algorithm + for (TSEncoding encoding : encodingList) { + Encoder encoder = TSEncodingBuilder.getEncodingBuilder(encoding).getEncoder(dataType); + Decoder decoder = Decoder.getDecoderByType(encoding, dataType); + long encodeTime = 0; + long decodeTime = 0; - // Iterate over each compression algorithm - for (CompressionType comp : compressList) { - ICompressor compressor = ICompressor.getCompressor(comp); - IUnCompressor unCompressor = IUnCompressor.getUnCompressor(comp); + // Iterate over each compression algorithm + for (CompressionType comp : compressList) { + ICompressor compressor = ICompressor.getCompressor(comp); + IUnCompressor unCompressor = IUnCompressor.getUnCompressor(comp); - double ratio = 0; - double compressed_size = 0; + double ratio = 0; + double compressed_size = 0; - long compressTime = 0; - long uncompressTime = 0; + long compressTime = 0; + long uncompressTime = 0; - // repeat many times to test time - for (int i = 0; i < repeatTime; i++) { - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + // repeat many times to test time + for (int i = 0; i < repeatTime; i++) { + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - // test encode time - long s = System.nanoTime(); - for (int val : tmp) { - encoder.encode(val, buffer); - } - - encoder.flush(buffer); - long e = System.nanoTime(); - encodeTime += (e - s); - - // test compress time - byte[] elems = buffer.toByteArray(); - s = System.nanoTime(); - byte[] compressed = compressor.compress(elems); - e = System.nanoTime(); - compressTime += (e - s); - - // test compression ratio and compressed size - compressed_size += compressed.length; - double ratioTmp = - (double) compressed.length / (double) (tmp.size() * Integer.BYTES); - ratio += ratioTmp; - - // test uncompress time - s = System.nanoTime(); - unCompressor.uncompress(compressed); - e = System.nanoTime(); - uncompressTime += (e - s); - - // test decode time - ByteBuffer ebuffer = ByteBuffer.wrap(buffer.toByteArray()); - s = System.nanoTime(); - while (decoder.hasNext(ebuffer)) { - decoder.readInt(ebuffer); - } - e = System.nanoTime(); - decodeTime += (e - s); - - buffer.close(); + // test encode time + long s = System.nanoTime(); + for (int val : tmp) { + encoder.encode(val, buffer); } - ratio /= repeatTime; - compressed_size /= repeatTime; - encodeTime /= repeatTime; - decodeTime /= repeatTime; + encoder.flush(buffer); + long e = System.nanoTime(); + encodeTime += (e - s); - String[] record = { + // test compress time + byte[] elems = buffer.toByteArray(); + s = System.nanoTime(); + byte[] compressed = compressor.compress(elems); + e = System.nanoTime(); + compressTime += (e - s); + + // test compression ratio and compressed size + compressed_size += compressed.length; + double ratioTmp = (double) compressed.length / (double) (tmp.size() * Integer.BYTES); + ratio += ratioTmp; + + // test uncompress time + s = System.nanoTime(); + unCompressor.uncompress(compressed); + e = System.nanoTime(); + uncompressTime += (e - s); + + // test decode time + ByteBuffer ebuffer = ByteBuffer.wrap(buffer.toByteArray()); + s = System.nanoTime(); + while (decoder.hasNext(ebuffer)) { + decoder.readInt(ebuffer); + } + e = System.nanoTime(); + decodeTime += (e - s); + + buffer.close(); + } + + ratio /= repeatTime; + compressed_size /= repeatTime; + encodeTime /= repeatTime; + decodeTime /= repeatTime; + + String[] record = { f.toString(), String.valueOf(index), encoding.toString(), @@ -243,98 +214,96 @@ String.valueOf(data.size()), String.valueOf(compressed_size), String.valueOf(ratio) - }; - System.out.println(ratio); - writer.writeRecord(record); - } + }; + System.out.println(ratio); + writer.writeRecord(record); } - tmp.clear(); - break; } + tmp.clear(); + break; + } // write info to file - case "float": - { - System.out.println("get float"); - TSDataType dataType = TSDataType.FLOAT; - ArrayList<Float> tmp = new ArrayList<>(); - data.removeIf(String::isEmpty); - for (String datum : data) { - tmp.add(Float.valueOf(datum)); + case "float": { + System.out.println("get float"); + TSDataType dataType = TSDataType.FLOAT; + ArrayList<Float> tmp = new ArrayList<>(); + data.removeIf(String::isEmpty); + for (String datum : data) { + tmp.add(Float.valueOf(datum)); + } + + // Iterate over each encoding algorithm + for (TSEncoding encoding : encodingList) { + Encoder encoder; + encoder = TSEncodingBuilder.getEncodingBuilder(encoding).getEncoder(dataType); + if (encoding == TSEncoding.TS_2DIFF) { + encoder = new FloatEncoder(TSEncoding.TS_2DIFF, dataType, max_precision); + } else if (encoding == TSEncoding.RLE) { + encoder = new FloatEncoder(TSEncoding.RLE, dataType, max_precision); } - // Iterate over each encoding algorithm - for (TSEncoding encoding : encodingList) { - Encoder encoder; - encoder = TSEncodingBuilder.getEncodingBuilder(encoding).getEncoder(dataType); - if (encoding == TSEncoding.TS_2DIFF){ - encoder = new FloatEncoder(TSEncoding.TS_2DIFF, dataType, max_precision); - } else if (encoding == TSEncoding.RLE){ - encoder = new FloatEncoder(TSEncoding.RLE, dataType, max_precision); - } + Decoder decoder = Decoder.getDecoderByType(encoding, dataType); - Decoder decoder = Decoder.getDecoderByType(encoding, dataType); + long encodeTime = 0; + long decodeTime = 0; + // Iterate over each compression algorithm + for (CompressionType comp : compressList) { + ICompressor compressor = ICompressor.getCompressor(comp); + IUnCompressor unCompressor = IUnCompressor.getUnCompressor(comp); + long compressTime = 0; + long uncompressTime = 0; + double ratio = 0; + double compressed_size = 0; - long encodeTime = 0; - long decodeTime = 0; - // Iterate over each compression algorithm - for (CompressionType comp : compressList) { - ICompressor compressor = ICompressor.getCompressor(comp); - IUnCompressor unCompressor = IUnCompressor.getUnCompressor(comp); - long compressTime = 0; - long uncompressTime = 0; - double ratio = 0; - double compressed_size = 0; + // repeat many times to test time + for (int i = 0; i < repeatTime; i++) { + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - // repeat many times to test time - for (int i = 0; i < repeatTime; i++) { - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - - // test encode time - long s = System.nanoTime(); - for (float val : tmp) { - encoder.encode(val, buffer); - } - encoder.flush(buffer); - long e = System.nanoTime(); - encodeTime += (e - s); - - // test compress time - byte[] elems = buffer.toByteArray(); - s = System.nanoTime(); - byte[] compressed = compressor.compress(elems); - e = System.nanoTime(); - compressTime += (e - s); - - // test compression ratio and compressed size - compressed_size += compressed.length; - double ratioTmp = - (double) compressed.length / (double) (tmp.size() * Float.BYTES); - ratio += ratioTmp; - - // test uncompress time - s = System.nanoTime(); - unCompressor.uncompress(compressed); - e = System.nanoTime(); - uncompressTime += (e - s); - - // test decode time - ByteBuffer ebuffer = ByteBuffer.wrap(buffer.toByteArray()); - while (decoder.hasNext(ebuffer)) { - decoder.readFloat(ebuffer); - } - e = System.nanoTime(); - decodeTime += (e - s); - - buffer.close(); + // test encode time + long s = System.nanoTime(); + for (float val : tmp) { + encoder.encode(val, buffer); } - ratio /= repeatTime; - compressed_size /= repeatTime; - encodeTime /= repeatTime; - decodeTime /= repeatTime; + encoder.flush(buffer); + long e = System.nanoTime(); + encodeTime += (e - s); - // write info to file - String[] record = { + // test compress time + byte[] elems = buffer.toByteArray(); + s = System.nanoTime(); + byte[] compressed = compressor.compress(elems); + e = System.nanoTime(); + compressTime += (e - s); + + // test compression ratio and compressed size + compressed_size += compressed.length; + double ratioTmp = (double) compressed.length / (double) (tmp.size() * Float.BYTES); + ratio += ratioTmp; + + // test uncompress time + s = System.nanoTime(); + unCompressor.uncompress(compressed); + e = System.nanoTime(); + uncompressTime += (e - s); + + // test decode time + ByteBuffer ebuffer = ByteBuffer.wrap(buffer.toByteArray()); + while (decoder.hasNext(ebuffer)) { + decoder.readFloat(ebuffer); + } + e = System.nanoTime(); + decodeTime += (e - s); + + buffer.close(); + } + ratio /= repeatTime; + compressed_size /= repeatTime; + encodeTime /= repeatTime; + decodeTime /= repeatTime; + + // write info to file + String[] record = { f.toString(), String.valueOf(index), encoding.toString(), @@ -346,13 +315,13 @@ String.valueOf(data.size()), String.valueOf(compressed_size), String.valueOf(ratio) - }; - System.out.println(ratio); - writer.writeRecord(record); - } + }; + System.out.println(ratio); + writer.writeRecord(record); } - break; } + break; + } } inputStream = Files.newInputStream(f.toPath()); loader = new CsvReader(inputStream, StandardCharsets.UTF_8);
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RLEBOSBImproveTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RLEBOSBImproveTest.java deleted file mode 100644 index 8909dab..0000000 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RLEBOSBImproveTest.java +++ /dev/null
@@ -1,2430 +0,0 @@ -package org.apache.iotdb.tsfile.encoding; - -import com.csvreader.CsvReader; -import com.csvreader.CsvWriter; -import org.junit.Test; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; - -import static java.lang.Math.pow; - -public class RLEBOSBImproveTest { - - public static int getBitWith(int num) { - if (num == 0) return 1; - else return 32 - Integer.numberOfLeadingZeros(num); - } - public static int getCount(long long1, int mask) { - return ((int) (long1 & mask)); - } - public static int getUniqueValue(long long1, int left_shift) { - return ((int) ((long1) >> left_shift)); - } - - - public static void int2Bytes(int integer,int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - public static void intByte2Bytes(int integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer); - } - - private static void long2intBytes(long integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - - public static int bytes2Integer(byte[] encoded, int start, int num) { - int value = 0; - if (num > 4) { - System.out.println("bytes2Integer error"); - return 0; - } - for (int i = 0; i < num; i++) { - value <<= 8; - int b = encoded[i + start] & 0xFF; - value |= b; - } - return value; - } - - private static long bytesLong2Integer(byte[] encoded, int decode_pos) { - long value = 0; - for (int i = 0; i < 4; i++) { - value <<= 8; - int b = encoded[i + decode_pos] & 0xFF; - value |= b; - } - return value; - } - - public static void pack8Values(ArrayList<Integer> values, int offset, int width, int encode_pos, byte[] encoded_result) { - int bufIdx = 0; - int valueIdx = offset; - // remaining bits for the current unfinished Integer - int leftBit = 0; - - while (valueIdx < 8 + offset) { - // buffer is used for saving 32 bits as a part of result - int buffer = 0; - // remaining size of bits in the 'buffer' - int leftSize = 32; - - // encode the left bits of current Integer to 'buffer' - if (leftBit > 0) { - buffer |= (values.get(valueIdx) << (32 - leftBit)); - leftSize -= leftBit; - leftBit = 0; - valueIdx++; - } - - while (leftSize >= width && valueIdx < 8 + offset) { - // encode one Integer to the 'buffer' - buffer |= (values.get(valueIdx)<< (leftSize - width)); - leftSize -= width; - valueIdx++; - } - // If the remaining space of the buffer can not save the bits for one Integer, - if (leftSize > 0 && valueIdx < 8 + offset) { - // put the first 'leftSize' bits of the Integer into remaining space of the - // buffer - buffer |= (values.get(valueIdx) >>> (width - leftSize)); - leftBit = width - leftSize; - } - - // put the buffer into the final result - for (int j = 0; j < 4; j++) { - encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); - encode_pos ++; - bufIdx++; - if (bufIdx >= width) { - return ; - } - } - } - - } - - public static void unpack8Values(byte[] encoded, int offset,int width, ArrayList<Integer> result_list) { - int byteIdx = offset; - long buffer = 0; - // total bits which have read from 'buf' to 'buffer'. i.e., - // number of available bits to be decoded. - int totalBits = 0; - int valueIdx = 0; - - while (valueIdx < 8) { - // If current available bits are not enough to decode one Integer, - // then add next byte from buf to 'buffer' until totalBits >= width - while (totalBits < width) { - buffer = (buffer << 8) | (encoded[byteIdx] & 0xFF); - byteIdx++; - totalBits += 8; - } - - // If current available bits are enough to decode one Integer, - // then decode one Integer one by one until left bits in 'buffer' is - // not enough to decode one Integer. - while (totalBits >= width && valueIdx < 8) { - result_list.add ((int) (buffer >>> (totalBits - width))); - valueIdx++; - totalBits -= width; - buffer = buffer & ((1L << totalBits) - 1); - } - } - } - - public static int bitPacking(ArrayList<Integer> numbers, int start, int bit_width,int encode_pos, byte[] encoded_result) { - int block_num = (numbers.size()-start) / 8; - for(int i=0;i<block_num;i++){ - pack8Values( numbers, start+i*8, bit_width,encode_pos, encoded_result); - encode_pos +=bit_width; - } - - return encode_pos; - - } - - public static ArrayList<Integer> decodeBitPacking( - byte[] encoded, int decode_pos, int bit_width, int block_size) { - ArrayList<Integer> result_list = new ArrayList<>(); - int block_num = (block_size - 1) / 8; - - for (int i = 0; i < block_num; i++) { // bitpacking - unpack8Values( encoded, decode_pos, bit_width, result_list); - decode_pos += bit_width; - } - return result_list; - } - - - public static int[] getAbsDeltaTsBlock( - int[] ts_block, - int i, - int block_size, - int remaining, - int[] min_delta, - ArrayList<Integer> repeat_count) { - int[] ts_block_delta = new int[remaining]; - - int value_delta_min = Integer.MAX_VALUE; - int value_delta_max = Integer.MIN_VALUE; - int base = i*block_size; - int end = i*block_size+remaining; - for (int j = base; j < end; j++) { - - int integer = ts_block[j]; - if (integer < value_delta_min) value_delta_min = integer; - if (integer > value_delta_max) { - value_delta_max = integer; - } - } - int pre_delta = ts_block[i*block_size]-value_delta_min; - int pre_count = 1; - - min_delta[0]=(value_delta_min); - int repeat_i = 0; - int ts_block_delta_i = 0; - for (int j = base+1; j < end; j++) { - int delta = ts_block[j]-value_delta_min; - if(delta == pre_delta){ - pre_count ++; - } else { - if(pre_count>7){ - repeat_count.add(repeat_i); - repeat_count.add(pre_count); - ts_block_delta[ts_block_delta_i]=pre_delta; - ts_block_delta_i ++; - } else{ - for (int k = 0; k < pre_count; k++){ - ts_block_delta[ts_block_delta_i] =pre_delta; - ts_block_delta_i++; - } - } - pre_count =1; - repeat_i = j - i*block_size; - } - pre_delta = delta; - - } - for (int j = 0; j < pre_count; j++){ - ts_block_delta[ts_block_delta_i] =pre_delta; - ts_block_delta_i++; - } - min_delta[1]=(ts_block_delta_i); - min_delta[2]=(value_delta_max-value_delta_min); - int[] new_ts_block_delta = new int[ts_block_delta_i]; - System.arraycopy(ts_block_delta, 0, new_ts_block_delta, 0, ts_block_delta_i); - - return new_ts_block_delta; - } - - - - public static int encodeOutlier2Bytes( - ArrayList<Integer> ts_block_delta, - int bit_width, - int encode_pos, byte[] encoded_result) { - - encode_pos = bitPacking(ts_block_delta, 0, bit_width, encode_pos, encoded_result); - - int n_k = ts_block_delta.size(); - int n_k_b = n_k / 8; - long cur_remaining = 0; // encoded int - int cur_number_bits = 0; // the bit width used of encoded int - for (int i = n_k_b * 8; i < n_k; i++) { - long cur_value = ts_block_delta.get(i); - int cur_bit_width = bit_width; // remaining bit width of current value - - if (cur_number_bits + bit_width >= 32) { - cur_remaining <<= (32 - cur_number_bits); - cur_bit_width = bit_width - 32 + cur_number_bits; - cur_remaining += ((cur_value >> cur_bit_width)); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - - cur_remaining = 0; - cur_number_bits = 0; - } - - cur_remaining <<= cur_bit_width; - cur_number_bits += cur_bit_width; - cur_remaining += (((cur_value << (32 - cur_bit_width)) & 0xFFFFFFFFL) >> (32 - cur_bit_width)); - } - cur_remaining <<= (32 - cur_number_bits); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - return encode_pos; - - } - - - public static ArrayList<Integer> decodeOutlier2Bytes( - byte[] encoded, - int decode_pos, - int bit_width, - int length, - ArrayList<Integer> encoded_pos_result - ) { - - int n_k_b = length / 8; - int remaining = length - n_k_b * 8; - ArrayList<Integer> result_list = new ArrayList<>(decodeBitPacking(encoded, decode_pos, bit_width, n_k_b * 8 + 1)); - decode_pos += n_k_b * bit_width; - - ArrayList<Long> int_remaining = new ArrayList<>(); - int int_remaining_size = remaining * bit_width / 32 + 1; - for (int j = 0; j < int_remaining_size; j++) { - - int_remaining.add(bytesLong2Integer(encoded, decode_pos)); - decode_pos += 4; - } - - int cur_remaining_bits = 32; // remaining bit width of current value - long cur_number = int_remaining.get(0); - int cur_number_i = 1; - for (int i = n_k_b * 8; i < length; i++) { - if (bit_width < cur_remaining_bits) { - int tmp = (int) (cur_number >> (32 - bit_width)); - result_list.add(tmp); - cur_number <<= bit_width; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits -= bit_width; - } else { - int tmp = (int) (cur_number >> (32 - cur_remaining_bits)); - int remain_bits = bit_width - cur_remaining_bits; - tmp <<= remain_bits; - - cur_number = int_remaining.get(cur_number_i); - cur_number_i++; - tmp += (cur_number >> (32 - remain_bits)); - result_list.add(tmp); - cur_number <<= remain_bits; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits = 32 - remain_bits; - } - } - encoded_pos_result.add(decode_pos); - return result_list; - } - - private static int BOSEncodeBits(int[] ts_block_delta, - int init_block_size, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - ArrayList<Integer> repeat_count, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal = new ArrayList<>(); - int k1 = 0; - int k2 = 0; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if (cur_value < final_k_start_value) { - final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - - - k1++; - - - } else if (cur_value >= final_k_end_value) { - final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { - final_normal.add(cur_value - final_x_l_plus); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - int size = repeat_count.size(); - intByte2Bytes(size,encode_pos,cur_byte); - encode_pos += 1; - - if (size != 0) - encode_pos =encodeOutlier2Bytes(repeat_count, getBitWith(init_block_size-1),encode_pos,cur_byte); - - - - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - int left_bit_width = getBitWith(final_k_start_value); - int right_bit_width = getBitWith(max_delta_value - final_k_end_value ); - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - if (final_alpha == 0) { - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } - encode_pos =encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - if (k1 != 0) - encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); - if (k2 != 0) - encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); - return encode_pos; - - } - - - - - private static int BOSBlockEncoder(int[] ts_block, int block_i, int block_size,int remaining, int encode_pos , byte[] cur_byte) { - - ArrayList<Integer> repeat_count = new ArrayList<>(); - int init_block_size = block_size; - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block,block_i, init_block_size,remaining, min_delta, repeat_count); - block_size = min_delta[1]; - - - int max_delta_value = min_delta[2]; - int[] value_list = new int[block_size]; - int unique_value_count = 0; - int[] value_count_list = new int[max_delta_value+1]; - for(int value:ts_block_delta){ - if(value_count_list[value]==0){ - value_count_list[value] = 1; - value_list[unique_value_count] = value; - unique_value_count ++; - }else{ - value_count_list[value] ++; - } - } - - int left_shift = getBitWith(block_size); - int mask = (1 << left_shift) - 1; - long[] sorted_value_list = new long[unique_value_count]; - int count = 0; - - for(int i=0;i<unique_value_count;i++){ - int value = value_list[i]; - - sorted_value_list[i] = (((long) value) << left_shift) + value_count_list[value]; - } - Arrays.sort(sorted_value_list); - - for(int i=0;i<unique_value_count;i++){ - count += getCount(sorted_value_list[i], mask); - sorted_value_list[i] = (((long)getUniqueValue(sorted_value_list[i], left_shift) ) << left_shift) + count;//new_value_list[i] - } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2 ) * (block_size)); - - int cur_k1 = 0; - - int x_l_plus_value = 0; // x_l_plus - int x_u_minus_value = max_delta_value; // x_u_plus - - for (int end_value_i = 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); // cur_k1 = 0 - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_x_u_minus = x_u_minus_value; - final_k_end_value = x_u_plus_value; - } - } - - int k_start_value = -1; // x_l_minus - - int gamma_max = getBitWith(max_delta_value); - int[] gamma_count_list = new int[gamma_max+1]; - int[] x_u_minus_value_list = new int[gamma_max+1]; - int[] x_u_plus_value_list = new int[gamma_max+1]; - int end_i = unique_value_count - 1; - for(int gamma = 0; gamma <= gamma_max; gamma++) { - int x_u_plus_pow_beta = max_delta_value - (1<<gamma) + 1; - for (; end_i > 0; end_i--) { - x_u_minus_value = getUniqueValue(sorted_value_list[end_i - 1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_i], left_shift); - if (x_u_minus_value < x_u_plus_pow_beta && x_u_plus_value >= x_u_plus_pow_beta){ - gamma_count_list[gamma] = getCount(sorted_value_list[end_i-1],mask); - x_u_minus_value_list[gamma] = x_u_minus_value; - x_u_plus_value_list[gamma] = x_u_plus_value; - } else if (x_u_minus_value < x_u_plus_pow_beta) { - break; - } - } - } - for(int gamma = 1; gamma < gamma_max; gamma++) { - if(gamma_count_list[gamma]==0){ - gamma_count_list[gamma] = gamma_count_list[gamma-1]; - x_u_minus_value_list[gamma] = x_u_minus_value_list[gamma-1]; - x_u_plus_value_list[gamma] = x_u_plus_value_list[gamma-1]; - } - } - - for (int start_value_i = 0; start_value_i < unique_value_count-1; start_value_i++) { - long k_start_valueL = sorted_value_list[start_value_i]; - k_start_value = getUniqueValue(k_start_valueL, left_shift) ; - x_l_plus_value = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - - - cur_k1 = getCount(k_start_valueL,mask); - - int k_end_value; - int cur_bits; - int cur_k2; - k_end_value = max_delta_value + 1; - - cur_bits = 0; - cur_k2 = 0; - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1) * getBitWith(k_end_value- x_l_plus_value); //cur_k2 =0 - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = max_delta_value; - } - - int beta_max = getBitWith(max_delta_value - x_l_plus_value); - int end_value_i = start_value_i + 1; - - int lower_outlier_cost = cur_k1 * getBitWith(k_start_value); - - for(int gamma = 0; gamma < beta_max; gamma++){ -// int x_u_plus_pow_beta = (int) (max_delta_value - pow(2,gamma)+1); - x_u_minus_value = x_u_minus_value_list[gamma]; - k_end_value = x_u_plus_value_list[gamma]; - cur_bits = 0; - cur_k2 = block_size - gamma_count_list[gamma]; - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += lower_outlier_cost; - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = x_u_minus_value; - } - - } -// end_value_i = unique_value_count - 1; -// for(int gamma = 1; gamma <= beta_max; gamma++){ -// for (; end_value_i > start_value_i; end_value_i--) { -// int x_u_plus_pow_beta = (int) (max_delta_value - pow(2,gamma)+1); -// x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); -// k_end_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); -// if(x_u_minus_value < x_u_plus_pow_beta && k_end_value >= x_u_plus_pow_beta){ -// cur_bits = 0; -// cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); -// -// cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); -// cur_bits += cur_k1 * getBitWith(k_start_value); -// if (cur_k1 + cur_k2 != block_size) -// cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); -// if (cur_k2 != 0) -// cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); -// -// -// if (cur_bits < min_bits) { -// min_bits = cur_bits; -// final_k_start_value = k_start_value; -// final_x_l_plus = x_l_plus_value; -// final_k_end_value = k_end_value; -// final_x_u_minus = x_u_minus_value; -// } -// } else if (x_u_minus_value <= x_u_plus_pow_beta && k_end_value <= x_u_plus_pow_beta) { -// break; -// } -// } -// } - - - } - for(int beta = 1; beta <= gamma_max; beta++){ - int pow_beta = 1<<beta; - int start_value_i = 0; - int end_value_i = start_value_i+1; - - for (; start_value_i < unique_value_count-1; start_value_i++) { - long x_l_minusL = sorted_value_list[start_value_i]; - int x_l_minus = getUniqueValue(x_l_minusL, left_shift) ; - int x_l_plus = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - int x_u_plus_pow_beta = pow_beta+x_l_plus; - if(x_u_plus_pow_beta > max_delta_value) break; - - - - cur_k1 = getCount(x_l_minusL,mask); - int lower_outlier_cost = cur_k1 * getBitWith(x_l_minus); - - while ( end_value_i < unique_value_count) { -// if(beta==3 && end_value_i==22) -// { -// System.out.println(x_l_minus); -// System.out.println(x_l_plus); -// } - - int x_u_minus = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus = getUniqueValue(sorted_value_list[end_value_i], left_shift); - if(x_u_minus < x_u_plus_pow_beta && x_u_plus >= x_u_plus_pow_beta){ - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += lower_outlier_cost; - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus - x_l_plus); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = x_l_minus; - final_x_l_plus = x_l_plus; - final_k_end_value = x_u_plus; - final_x_u_minus = x_u_minus; - } - break; - } -// else if (x_u_minus >= x_u_plus_pow_beta && x_u_plus >= x_u_plus_pow_beta) { -// break; -// } - - end_value_i++; - } - } - - } - - encode_pos = BOSEncodeBits(ts_block_delta,init_block_size, final_k_start_value,final_x_l_plus, final_k_end_value,final_x_u_minus, - max_delta_value, min_delta,repeat_count, encode_pos , cur_byte); - - return encode_pos; - } - - public static int BOSEncoder( - int[] data, int block_size, byte[] encoded_result) { - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { - encode_pos = BOSBlockEncoder(data, i, block_size, block_size,encode_pos,encoded_result); -// System.out.println(encode_pos); - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } else { - - int start = block_num * block_size; - int remaining = length_all-start; - - - encode_pos = BOSBlockEncoder(data, block_num, block_size,remaining, encode_pos,encoded_result); - -// int[] ts_block = new int[length_all-start]; -// if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, length_all - start); -// -// -// encode_pos = BOSBlockEncoder(ts_block, encode_pos,encoded_result); - - } - - - return encode_pos; - } - - public static int BOSBlockDecoder(byte[] encoded, int decode_pos, int[] value_list,int init_block_size, int block_size, int[] value_pos_arr) { - - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int count_size = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - ArrayList<Integer> repeat_count = new ArrayList<>(); - if (count_size != 0) { - ArrayList<Integer> repeat_count_result = new ArrayList<>(); - repeat_count = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(init_block_size-1), count_size, repeat_count_result); - decode_pos = repeat_count_result.get(0); - - } - - int cur_block_size = block_size; - for (int i = 1; i < count_size; i += 2) { - cur_block_size -= (repeat_count.get(i) - 1); - } - - int final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - int left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - int right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - - if (final_alpha == 0) { // 0 - int bitmap_bytes = (int) Math.ceil((double) (cur_block_size + k1 + k2) / (double) 8); -// System.out.println("bitmap_bytes:" + bitmap_bytes); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < cur_block_size) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(cur_block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(cur_block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - - } - - - ArrayList<Integer> decode_pos_normal = new ArrayList<>(); - final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, cur_block_size - k1 - k2, decode_pos_normal); - - decode_pos = decode_pos_normal.get(0); - if (k1 != 0) { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); - - decode_pos = decode_pos_result_left.get(0); - } - if (k2 != 0) { - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); - decode_pos = decode_pos_result_right.get(0); - } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; - int pre_v; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - - int cur_i = 0; - int repeat_i = 0; - for (int i = 0; i < cur_block_size; i++) { - - int current_delta; - if (left_outlier_i >= k1) { - if (right_outlier_i >= k2) { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } - } else if (i == final_left_outlier_index.get(left_outlier_i)) { - current_delta = final_left_outlier.get(left_outlier_i); - left_outlier_i++; - } else { - - if (right_outlier_i >= k2) { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } - } - pre_v = current_delta + min_delta; - if (repeat_i < count_size) { - if (cur_i == repeat_count.get(repeat_i)) { - cur_i += (repeat_count.get(repeat_i+1)); - - for (int j = 0; j < repeat_count.get(repeat_i + 1); j++) { - value_list[value_pos_arr[0]] = pre_v; - value_pos_arr[0]++; - } - repeat_i += 2; - }else { - cur_i++; - value_list[value_pos_arr[0]] = pre_v; - value_pos_arr[0]++; - } - } else { - cur_i++; - value_list[value_pos_arr[0]] = pre_v; - value_pos_arr[0]++; - } - } - return decode_pos; - } - - public static void BOSDecoder(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - int[] value_list = new int[length_all+block_size]; - int[] value_pos_arr = new int[1]; - - for (int k = 0; k < block_num; k++) { - decode_pos = BOSBlockDecoder(encoded, decode_pos, value_list,block_size, block_size,value_pos_arr); - } - - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - BOSBlockDecoder(encoded, decode_pos, value_list,block_size, remain_length, value_pos_arr); - } - } - - public static int EncodeBits(int num, - int bit_width, - int encode_pos, - byte[] cur_byte, - int[] bit_index_list){ - // 找到要插入的位的索引 - int bit_index = bit_index_list[0] ;//cur_byte[encode_pos + 1]; - - // 计算数值的起始位位置 - int remaining_bits = bit_width; - - while (remaining_bits > 0) { - // 计算在当前字节中可以使用的位数 - int available_bits = bit_index; - int bits_to_write = Math.min(available_bits, remaining_bits); - - // 更新 bit_index - bit_index = available_bits - bits_to_write; - - // 计算要写入的位的掩码和数值 - int mask = (1 << bits_to_write) - 1; - int bits = (num >> (remaining_bits - bits_to_write)) & mask; - - // 写入到当前位置 - cur_byte[encode_pos] &= (byte) ~(mask << bit_index); // 清除对应位置的位 - cur_byte[encode_pos] |= (byte) (bits << bit_index); - - // 更新位宽和数值 - remaining_bits -= bits_to_write; - if (bit_index == 0) { - bit_index = 8; - encode_pos++; - } - } - bit_index_list[0] = bit_index; -// cur_byte[encode_pos + 1] = (byte) bit_index; - return encode_pos; - } - private static int BOSEncodeBitsImprove(int[] ts_block_delta, - int init_block_size, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - ArrayList<Integer> repeat_count, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - - int k1 = 0; - int k2 = 0; - - - - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { -// encode_pos = EncodeBits(cur_value,left_bit_width,encode_pos,cur_byte); -// final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - k1++; - - - } else if (cur_value >= final_k_end_value) { -// encode_pos = EncodeBits(cur_value- final_k_end_value,right_bit_width,encode_pos,cur_byte); -// final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { -// final_normal.add(cur_value - final_x_l_plus); -// encode_pos = EncodeBits(cur_value- final_x_l_plus,right_bit_width,encode_pos,cur_byte); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - - int size = repeat_count.size(); - intByte2Bytes(size,encode_pos,cur_byte); - encode_pos += 1; - - int[] bit_index_list = new int[1]; - bit_index_list[0] = 8; - if (size != 0){ - int bit_width_init = getBitWith(init_block_size-1); - for(int repeat_count_v:repeat_count){ - encode_pos = EncodeBits(repeat_count_v, bit_width_init, encode_pos, cur_byte, bit_index_list); - } - if(bit_index_list[0] != 8){ - bit_index_list[0] = 8; - encode_pos ++; - } - } -// encode_pos =encodeOutlier2Bytes(repeat_count, getBitWith(init_block_size-1),encode_pos,cur_byte); - -// int2Bytes(min_delta[1],encode_pos,cur_byte); -// encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - - - if(final_k_start_value<0 && final_k_end_value > max_delta_value){ -// int bit_width_final= getBitWith(final_x_u_minus - final_x_l_plus); -// cur_byte[encode_pos+1] = 8; - bit_index_list[0] = 8; - for (int cur_value : ts_block_delta) { - encode_pos = EncodeBits(cur_value, bit_width_final, encode_pos, cur_byte, bit_index_list); -// final_normal.add(cur_value); - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } -// cur_byte[encode_pos+1] = 0; -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - return encode_pos; - } - - - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } -// cur_byte[encode_pos+1] = 8; -// bit_index_list[0] = 8; - for (int cur_value : ts_block_delta) { - if (cur_value <= final_k_start_value) { - encode_pos = EncodeBits(cur_value, left_bit_width, encode_pos, cur_byte,bit_index_list); - } else if (cur_value >= final_k_end_value) { - encode_pos = EncodeBits(cur_value - final_k_end_value, right_bit_width, encode_pos, cur_byte,bit_index_list); - } else { - encode_pos = EncodeBits(cur_value - final_x_l_plus, bit_width_final, encode_pos, cur_byte,bit_index_list); - } - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } - -// cur_byte[encode_pos+1] = 0; - -// if(k1==0 && k2==0){ -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// -// -// } -// else{ -// int2Bytes(final_x_l_plus,encode_pos,cur_byte); -// encode_pos += 4; -// int2Bytes(final_k_end_value,encode_pos,cur_byte); -// encode_pos += 4; -// -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(left_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(right_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// if (final_alpha == 0) { // 0 -// -// for (int i : bitmap_outlier) { -// -// intByte2Bytes(i,encode_pos,cur_byte); -// encode_pos += 1; -// } -// } else { -// encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// } -// } - - -// if(k1+k2!=block_size) -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// if (k1 != 0) -// encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); -// if (k2 != 0) -// encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); -// System.out.println(encode_pos); - return encode_pos; - - } - private static int BOSBlockEncoderImprove(int[] ts_block, int block_i, int block_size,int remaining, int encode_pos , byte[] cur_byte) { - - ArrayList<Integer> repeat_count = new ArrayList<>(); - int init_block_size = block_size; - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block,block_i, init_block_size,remaining, min_delta, repeat_count); - block_size = min_delta[1]; - - - int max_delta_value = min_delta[2]; - int[] value_list = new int[block_size]; - int unique_value_count = 0; - int[] value_count_list = new int[max_delta_value+1]; - for(int value:ts_block_delta){ - if(value_count_list[value]==0){ - value_count_list[value] = 1; - value_list[unique_value_count] = value; - unique_value_count ++; - }else{ - value_count_list[value] ++; - } - } - - int left_shift = getBitWith(block_size); - int mask = (1 << left_shift) - 1; - long[] sorted_value_list = new long[unique_value_count]; - int count = 0; - - for(int i=0;i<unique_value_count;i++){ - int value = value_list[i]; - - sorted_value_list[i] = (((long) value) << left_shift) + value_count_list[value]; - } - Arrays.sort(sorted_value_list); - - for(int i=0;i<unique_value_count;i++){ - count += getCount(sorted_value_list[i], mask); - sorted_value_list[i] = (((long)getUniqueValue(sorted_value_list[i], left_shift) ) << left_shift) + count;//new_value_list[i] - } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2 ) * (block_size)); - - int cur_k1 = 0; - - int x_l_plus_value = 0; // x_l_plus - int x_u_minus_value = max_delta_value; // x_u_plus - - for (int end_value_i = 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); // cur_k1 = 0 - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_x_u_minus = x_u_minus_value; - final_k_end_value = x_u_plus_value; - } - } - - int k_start_value = -1; // x_l_minus - - int gamma_max = getBitWith(max_delta_value); - int[] gamma_count_list = new int[gamma_max+1]; - int[] x_u_minus_value_list = new int[gamma_max+1]; - int[] x_u_plus_value_list = new int[gamma_max+1]; - int end_i = unique_value_count - 1; - for(int gamma = 0; gamma <= gamma_max; gamma++) { - int x_u_plus_pow_beta = max_delta_value - (1<<gamma) + 1; - for (; end_i > 0; end_i--) { - x_u_minus_value = getUniqueValue(sorted_value_list[end_i - 1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_i], left_shift); - if (x_u_minus_value < x_u_plus_pow_beta && x_u_plus_value >= x_u_plus_pow_beta){ - gamma_count_list[gamma] = getCount(sorted_value_list[end_i-1],mask); - x_u_minus_value_list[gamma] = x_u_minus_value; - x_u_plus_value_list[gamma] = x_u_plus_value; - } else if (x_u_minus_value < x_u_plus_pow_beta) { - break; - } - } - } - for(int gamma = 1; gamma < gamma_max; gamma++) { - if(gamma_count_list[gamma]==0){ - gamma_count_list[gamma] = gamma_count_list[gamma-1]; - x_u_minus_value_list[gamma] = x_u_minus_value_list[gamma-1]; - x_u_plus_value_list[gamma] = x_u_plus_value_list[gamma-1]; - } - } - - for (int start_value_i = 0; start_value_i < unique_value_count-1; start_value_i++) { - long k_start_valueL = sorted_value_list[start_value_i]; - k_start_value = getUniqueValue(k_start_valueL, left_shift) ; - x_l_plus_value = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - - - cur_k1 = getCount(k_start_valueL,mask); - - int k_end_value; - int cur_bits; - int cur_k2; - k_end_value = max_delta_value + 1; - - cur_bits = 0; - cur_k2 = 0; - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1) * getBitWith(k_end_value- x_l_plus_value); //cur_k2 =0 - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = max_delta_value; - } - - int beta_max = getBitWith(max_delta_value - x_l_plus_value); - int end_value_i = start_value_i + 1; - - int lower_outlier_cost = cur_k1 * getBitWith(k_start_value); - - for(int gamma = 0; gamma < beta_max; gamma++){ -// int x_u_plus_pow_beta = (int) (max_delta_value - pow(2,gamma)+1); - x_u_minus_value = x_u_minus_value_list[gamma]; - k_end_value = x_u_plus_value_list[gamma]; - cur_bits = 0; - cur_k2 = block_size - gamma_count_list[gamma]; - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += lower_outlier_cost; - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = x_u_minus_value; - } - - } -// end_value_i = unique_value_count - 1; -// for(int gamma = 1; gamma <= beta_max; gamma++){ -// for (; end_value_i > start_value_i; end_value_i--) { -// int x_u_plus_pow_beta = (int) (max_delta_value - pow(2,gamma)+1); -// x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); -// k_end_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); -// if(x_u_minus_value < x_u_plus_pow_beta && k_end_value >= x_u_plus_pow_beta){ -// cur_bits = 0; -// cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); -// -// cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); -// cur_bits += cur_k1 * getBitWith(k_start_value); -// if (cur_k1 + cur_k2 != block_size) -// cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); -// if (cur_k2 != 0) -// cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); -// -// -// if (cur_bits < min_bits) { -// min_bits = cur_bits; -// final_k_start_value = k_start_value; -// final_x_l_plus = x_l_plus_value; -// final_k_end_value = k_end_value; -// final_x_u_minus = x_u_minus_value; -// } -// } else if (x_u_minus_value <= x_u_plus_pow_beta && k_end_value <= x_u_plus_pow_beta) { -// break; -// } -// } -// } - - - } - for(int beta = 0; beta <= gamma_max; beta++){ - int pow_beta = 1<<beta; - int start_value_i = 0; - int end_value_i = start_value_i+1; - - for (; start_value_i < unique_value_count-1; start_value_i++) { - long x_l_minusL = sorted_value_list[start_value_i]; - int x_l_minus = getUniqueValue(x_l_minusL, left_shift) ; - int x_l_plus = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - int x_u_plus_pow_beta = pow_beta+x_l_plus; - if(x_u_plus_pow_beta > max_delta_value) break; - - - - cur_k1 = getCount(x_l_minusL,mask); - int lower_outlier_cost = cur_k1 * getBitWith(x_l_minus); - - while ( end_value_i < unique_value_count) { -// if(beta==3 && end_value_i==22) -// { -// System.out.println(x_l_minus); -// System.out.println(x_l_plus); -// } - - int x_u_minus = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus = getUniqueValue(sorted_value_list[end_value_i], left_shift); - if(x_u_minus < x_u_plus_pow_beta && x_u_plus >= x_u_plus_pow_beta){ - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += lower_outlier_cost; - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus - x_l_plus); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = x_l_minus; - final_x_l_plus = x_l_plus; - final_k_end_value = x_u_plus; - final_x_u_minus = x_u_minus; - } - break; - } -// else if (x_u_minus >= x_u_plus_pow_beta && x_u_plus >= x_u_plus_pow_beta) { -// break; -// } - - end_value_i++; - } - } - - } - - encode_pos = BOSEncodeBitsImprove(ts_block_delta,init_block_size, final_k_start_value,final_x_l_plus, final_k_end_value,final_x_u_minus, - max_delta_value, min_delta,repeat_count, encode_pos , cur_byte); - - return encode_pos; - } - - public static int BOSEncoderImprove( - int[] data, int block_size, byte[] encoded_result) { - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { - - - encode_pos = BOSBlockEncoderImprove(data, i, block_size, block_size,encode_pos,encoded_result); -// System.out.println(encode_pos); - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } else { - - int start = block_num * block_size; - int remaining = length_all-start; - - encode_pos = BOSBlockEncoderImprove(data, block_num, block_size,remaining, encode_pos,encoded_result); - -// int[] ts_block = new int[length_all-start]; -// if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, length_all - start); -// -// -// encode_pos = BOSBlockEncoder(ts_block, encode_pos,encoded_result); - - } - - - return encode_pos; - } - - - - public static int DecodeBits(byte[] cur_byte, int bit_width, int[] decode_pos_list) { - int decode_pos = decode_pos_list[0]; - int bit_index = decode_pos_list[1]; //cur_byte[decode_pos + 1]; - int remaining_bits = bit_width; - int num = 0; - - while (remaining_bits > 0) { - int available_bits = bit_index; - int bits_to_read = Math.min(available_bits, remaining_bits); - - // 计算要读取的位的掩码 - int mask = (1 << bits_to_read) - 1; - int bits = (cur_byte[decode_pos] >> (available_bits - bits_to_read)) & mask; - - // 将读取的位合并到结果中 - num = (num << bits_to_read) | bits; - - // 更新位宽和 bit_index - remaining_bits -= bits_to_read; - bit_index = available_bits - bits_to_read; - - if (bit_index == 0) { - bit_index = 8; - decode_pos++; - } - } - decode_pos_list[0] = decode_pos; - decode_pos_list[1] = bit_index; - - return num; - } - - public static int BOSBlockDecoderImprove(byte[] encoded, int decode_pos, int[] value_list,int init_block_size, int block_size, int[] value_pos_arr) { - - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - -// int value0 = bytes2Integer(encoded, decode_pos, 4); -// decode_pos += 4; -// value_list[value_pos_arr[0]] =value0; -// value_pos_arr[0] ++; - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int count_size = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - int[] decode_list = new int[2]; - decode_list[0]= decode_pos; - decode_list[1]= 8; - - int valuePos = value_pos_arr[0]; - -// ArrayList<Integer> repeat_count = new ArrayList<>(); - ArrayList<Integer> repeat_count = new ArrayList<>(); - if (count_size != 0) { - int bit_width_init = getBitWith(init_block_size-1); - for(int i = 0;i<count_size;i++){ - int repeat_count_v = DecodeBits(encoded, bit_width_init, decode_list); - repeat_count.add(repeat_count_v); - } - - if(decode_list[1] != 8){ - decode_list[1] = 8; - decode_list[0] ++; - } -// repeat_count = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(init_block_size-1), count_size, repeat_count_result); - decode_pos = decode_list[0]; -// decode_list[1]= 8; - } - - int cur_block_size = block_size; - for (int i = 1; i < count_size; i += 2) { - cur_block_size -= (repeat_count.get(i) - 1); - } - - int bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - - - int pre_v; - int cur_i = 0; - int repeat_i = 0; - if(k1==0 && k2==0){ -// int pre_v = value0; - decode_list[0] = decode_pos; - decode_list[1]= 8; - for (int i = 0; i < cur_block_size; i++) { - pre_v = min_delta + DecodeBits(encoded, bit_width_final, decode_list); -// value_list[value_pos_arr[0]++] = pre_v; - if (repeat_i < count_size && cur_i == repeat_count.get(repeat_i)) { - cur_i += (repeat_count.get(repeat_i+1)); - - for (int j = 0; j < repeat_count.get(repeat_i + 1); j++) { - value_list[value_pos_arr[0]++] = pre_v; - } - repeat_i += 2; - } else { - cur_i++; - value_list[value_pos_arr[0]++] = pre_v; - } - } - if(decode_list[1] != 8){ - decode_list[1] = 8; - decode_list[0] ++; - } - -// value_pos_arr[0] = valuePos; - return decode_list[0]; - } - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); -// ArrayList<Integer> final_left_outlier = new ArrayList<>(); -// ArrayList<Integer> final_right_outlier = new ArrayList<>(); -// ArrayList<Integer> final_normal= new ArrayList<>();; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int final_k_start_value = 0; - int final_k_end_value = 0; -// int bit_width_final = 0; - int left_bit_width = 0; - int right_bit_width = 0; - - final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - -// bit_width_final = bytes2Integer(encoded, decode_pos, 1); -// decode_pos += 1; - - left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (cur_block_size + k1 + k2) / (double) 8); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < cur_block_size ) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(cur_block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(cur_block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - - - - - -// ArrayList<Integer> decode_pos_normal = new ArrayList<>(); -// final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size - k1 - k2, decode_pos_normal); -// -// decode_pos = decode_pos_normal.get(0); -// if (k1 != 0) { -// ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); -// final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); -// decode_pos = decode_pos_result_left.get(0); -// } -// if (k2 != 0) { -// ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); -// final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); -// decode_pos = decode_pos_result_right.get(0); -// } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; -// int pre_v = value0; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - -// Precompute constants - int normalOffset = min_delta + final_k_start_value; - int rightOutlierOffset = min_delta + final_k_end_value; - -// Initialize indices and pre-fetch next outlier positions - int leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - int rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - decode_list[0]= decode_pos; -// decode_list[1]= 8; - // Use a local variable for the position - for (int i = 0; i < cur_block_size; i++) { -// int currentDelta; - if (i == leftOutlierNextIndex) { - // Process left outlier - pre_v = min_delta + DecodeBits(encoded, left_bit_width, decode_list); // final_left_outlier.get(left_outlier_i); - left_outlier_i++; - leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - } else if (i == rightOutlierNextIndex) { - // Process right outlier - pre_v = rightOutlierOffset + DecodeBits(encoded, right_bit_width, decode_list);// final_right_outlier.get(right_outlier_i); - right_outlier_i++; - rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - } else { - // Process normal value - pre_v = normalOffset + DecodeBits(encoded, bit_width_final, decode_list); - normal_i++; - } - if (repeat_i < count_size && cur_i == repeat_count.get(repeat_i)) { - cur_i += (repeat_count.get(repeat_i+1)); - - for (int j = 0; j < repeat_count.get(repeat_i + 1); j++) { - value_list[value_pos_arr[0]++] = pre_v; - } - repeat_i += 2; - } else { - cur_i++; - value_list[value_pos_arr[0]++] = pre_v; - } - // Update the cumulative value and store it -// pre_v += deZigzag(currentDelta); -// value_list[valuePos++] = pre_v; - } -// value_pos_arr[0] = valuePos; - if(decode_list[1]!=8){ - return decode_list[0]+1; - }else { - return decode_list[0]; - } -// decode_pos = decode_list[0]; -// Update the position in the array - - -// return decode_pos; - } - public static void BOSDecoderImprove(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - int[] value_list = new int[length_all+block_size]; - int[] value_pos_arr = new int[1]; - - for (int k = 0; k < block_num; k++) { -// System.out.println(k); - decode_pos = BOSBlockDecoderImprove(encoded, decode_pos, value_list, block_size, block_size, value_pos_arr); -// System.out.println(decode_pos); - } - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - BOSBlockDecoderImprove(encoded, decode_pos, value_list,block_size, remain_length, value_pos_arr); - } - } - - public static void main(@org.jetbrains.annotations.NotNull String[] args) throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/rle_bos_b"; - - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(256); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 8; file_i < 9; file_i++) { - - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - - loader.readHeaders(); - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "RLE+BOS-B", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - writer.close(); - - - } - } - - @Test - public void BOSOUOExpTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; -// String output_parent_dir = parent_dir + "icde0802/compression_ratio/exp_b"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R2O3_lower_outlier_compare/compression_ratio/bos"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - - dataset_name.add("Synthetic_Exp_100"); - dataset_name.add("Synthetic_Exp_1000"); - dataset_name.add("Synthetic_Exp_10000"); - dataset_name.add("Synthetic_Exp_100000"); - dataset_name.add("Synthetic_Exp_1000000"); - dataset_name.add("Synthetic_Normal_100"); - dataset_name.add("Synthetic_Normal_1000"); - dataset_name.add("Synthetic_Normal_10000"); - dataset_name.add("Synthetic_Normal_100000"); - dataset_name.add("Synthetic_Normal_1000000"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/Exp_100.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Exp_1000.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Exp_10000.csv");// 2 - output_path_list.add(output_parent_dir + "/Exp_100000.csv");// 2 - output_path_list.add(output_parent_dir + "/Exp_1000000.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Normal_100.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Normal_1000.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Normal_10000.csv");//5 - output_path_list.add(output_parent_dir + "/Normal_100000.csv");//5 - output_path_list.add(output_parent_dir + "/Normal_1000000.csv");//5 -// dataset_block_size.add(2048); - - int repeatTime2 = 100; -// for (int file_i = 8; file_i < 9; file_i++) { - - for (int file_i = input_path_list.size()-1; file_i >=0 ; file_i--) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { - System.out.println(f); - if(f.toString().contains(".DS")) continue; - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - - loader.readHeaders(); - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "RLE+BOS-B", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - writer.close(); - - - } - } - @Test - public void BOSImproveDecodeTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R1O4_decode_time/compression_ratio/rle_bos_b"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 4; file_i < 5; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[1]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "RLE+BOS-B", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); -// break; - } - writer.close(); - } - } - - @Test - public void BOSImproveEncodeTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/rle_bos_b_improve"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 4; file_i < 5; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[1]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "RLE+BOS-B", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); -// break; - } - writer.close(); - } - } - -}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RLEBOSMTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RLEBOSMTest.java deleted file mode 100644 index 8367368..0000000 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RLEBOSMTest.java +++ /dev/null
@@ -1,2417 +0,0 @@ -package org.apache.iotdb.tsfile.encoding; - -import com.csvreader.CsvReader; -import com.csvreader.CsvWriter; -import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry; -import org.apache.commons.compress.archivers.sevenz.SevenZOutputFile; -import org.apache.iotdb.tsfile.compress.ICompressor; -import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType; -import org.junit.Test; - -import java.io.*; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.ArrayList; - -import static java.lang.Math.*; -import static org.apache.iotdb.tsfile.constant.TestConstant.random; - -public class RLEBOSMTest { - - public static int findMedianImprove(int[] arr) { - if (arr == null || arr.length == 0) { - throw new IllegalArgumentException("数组不能为空"); - } - int n = arr.length; - return quickSelect2(arr, 0, n - 1, n / 2); - } - - private static int quickSelect2(int[] arr, int left, int right, int k){ - int pivotV=arr[left+random.nextInt(right-left+1)],tmpV; - int posEqual=left,posSmaller=left; // a[left,posEqual): = pivotV; a[posEqual,posSmaller): < pivotV - for(int i=left;i<=right;i++){ - if(arr[i]==pivotV){ - tmpV=arr[i]; - arr[i]=arr[posSmaller]; - arr[posSmaller]=arr[posEqual]; - arr[posEqual]=tmpV; - posEqual++; - posSmaller++; - }else - if(arr[i]<pivotV){ - tmpV=arr[posSmaller]; - arr[posSmaller]=arr[i]; - arr[i]=tmpV; - posSmaller++; - } - } - if(k+(posEqual-left)<=posSmaller-1)return quickSelect2(arr,posEqual,posSmaller-1,k+(posEqual-left)); - else if(k<=posSmaller-1)return pivotV; - else return quickSelect2(arr,posSmaller,right,k); - } - public static int findMedian(int[] arr) { - if (arr == null || arr.length == 0) { - throw new IllegalArgumentException("数组不能为空"); - } - int n = arr.length; - return quickSelect(arr, 0, n - 1, n / 2); - } - - private static int quickSelect(int[] arr, int left, int right, int k) { - if (left == right) { - return arr[left]; - } - - int pivotIndex = partition(arr, left, right); - if (k == pivotIndex) { - return arr[k]; - } else if (k < pivotIndex) { - return quickSelect(arr, left, pivotIndex - 1, k); - } else { - return quickSelect(arr, pivotIndex + 1, right, k); - } - } - - private static int partition(int[] arr, int left, int right) { - int pivot = arr[right]; - int i = left; - for (int j = left; j < right; j++) { - if (arr[j] <= pivot) { - swap(arr, i, j); - i++; - } - } - swap(arr, i, right); - return i; - } - - private static void swap(int[] arr, int i, int j) { - int temp = arr[i]; - arr[i] = arr[j]; - arr[j] = temp; - } - - public static int getBitWith(int num) { - if (num == 0) return 1; - else return 32 - Integer.numberOfLeadingZeros(num); - } - public static int getCount(long long1, int mask) { - return ((int) (long1 & mask)); - } - public static int getUniqueValue(long long1, int left_shift) { - return ((int) ((long1) >> left_shift)); - } - - - public static void int2Bytes(int integer,int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - public static void intByte2Bytes(int integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer); - } - - private static void long2intBytes(long integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - - public static int bytes2Integer(byte[] encoded, int start, int num) { - int value = 0; - if (num > 4) { - System.out.println("bytes2Integer error"); - return 0; - } - for (int i = 0; i < num; i++) { - value <<= 8; - int b = encoded[i + start] & 0xFF; - value |= b; - } - return value; - } - - private static long bytesLong2Integer(byte[] encoded, int decode_pos) { - long value = 0; - for (int i = 0; i < 4; i++) { - value <<= 8; - int b = encoded[i + decode_pos] & 0xFF; - value |= b; - } - return value; - } - - public static void pack8Values(ArrayList<Integer> values, int offset, int width, int encode_pos, byte[] encoded_result) { - int bufIdx = 0; - int valueIdx = offset; - // remaining bits for the current unfinished Integer - int leftBit = 0; - - while (valueIdx < 8 + offset) { - // buffer is used for saving 32 bits as a part of result - int buffer = 0; - // remaining size of bits in the 'buffer' - int leftSize = 32; - - // encode the left bits of current Integer to 'buffer' - if (leftBit > 0) { - buffer |= (values.get(valueIdx) << (32 - leftBit)); - leftSize -= leftBit; - leftBit = 0; - valueIdx++; - } - - while (leftSize >= width && valueIdx < 8 + offset) { - // encode one Integer to the 'buffer' - buffer |= (values.get(valueIdx)<< (leftSize - width)); - leftSize -= width; - valueIdx++; - } - // If the remaining space of the buffer can not save the bits for one Integer, - if (leftSize > 0 && valueIdx < 8 + offset) { - // put the first 'leftSize' bits of the Integer into remaining space of the - // buffer - buffer |= (values.get(valueIdx) >>> (width - leftSize)); - leftBit = width - leftSize; - } - - // put the buffer into the final result - for (int j = 0; j < 4; j++) { - encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); - encode_pos ++; - bufIdx++; - if (bufIdx >= width) { - return ; - } - } - } - - } - - public static void unpack8Values(byte[] encoded, int offset,int width, ArrayList<Integer> result_list) { - int byteIdx = offset; - long buffer = 0; - // total bits which have read from 'buf' to 'buffer'. i.e., - // number of available bits to be decoded. - int totalBits = 0; - int valueIdx = 0; - - while (valueIdx < 8) { - // If current available bits are not enough to decode one Integer, - // then add next byte from buf to 'buffer' until totalBits >= width - while (totalBits < width) { - buffer = (buffer << 8) | (encoded[byteIdx] & 0xFF); - byteIdx++; - totalBits += 8; - } - - // If current available bits are enough to decode one Integer, - // then decode one Integer one by one until left bits in 'buffer' is - // not enough to decode one Integer. - while (totalBits >= width && valueIdx < 8) { - result_list.add ((int) (buffer >>> (totalBits - width))); - valueIdx++; - totalBits -= width; - buffer = buffer & ((1L << totalBits) - 1); - } - } - } - - public static int bitPacking(ArrayList<Integer> numbers, int start, int bit_width,int encode_pos, byte[] encoded_result) { - int block_num = (numbers.size()-start) / 8; - for(int i=0;i<block_num;i++){ - pack8Values( numbers, start+i*8, bit_width,encode_pos, encoded_result); - encode_pos +=bit_width; - } - - return encode_pos; - - } - - public static ArrayList<Integer> decodeBitPacking( - byte[] encoded, int decode_pos, int bit_width, int block_size) { - ArrayList<Integer> result_list = new ArrayList<>(); - int block_num = (block_size - 1) / 8; - - for (int i = 0; i < block_num; i++) { // bitpacking - unpack8Values( encoded, decode_pos, bit_width, result_list); - decode_pos += bit_width; - } - return result_list; - } - - - public static int[] getAbsDeltaTsBlock( - int[] ts_block, - int i, - int block_size, - int remaining, - int[] min_delta, - ArrayList<Integer> repeat_count) { - int[] ts_block_delta = new int[remaining]; - - int value_delta_min = Integer.MAX_VALUE; - int value_delta_max = Integer.MIN_VALUE; - int base = i*block_size; - int end = i*block_size+remaining; - for (int j = base; j < end; j++) { - - int integer = ts_block[j]; - if (integer < value_delta_min) value_delta_min = integer; - if (integer > value_delta_max) { - value_delta_max = integer; - } - } - int pre_delta = ts_block[i*block_size]-value_delta_min; - int pre_count = 1; - - min_delta[0]=(value_delta_min); - int repeat_i = 0; - int ts_block_delta_i = 0; - for (int j = base+1; j < end; j++) { - int delta = ts_block[j]-value_delta_min; - if(delta == pre_delta){ - pre_count ++; - } else { - if(pre_count>7){ - repeat_count.add(repeat_i); - repeat_count.add(pre_count); - ts_block_delta[ts_block_delta_i]=pre_delta; - ts_block_delta_i ++; - } else{ - for (int k = 0; k < pre_count; k++){ - ts_block_delta[ts_block_delta_i] =pre_delta; - ts_block_delta_i++; - } - } - pre_count =1; - repeat_i = j - i*block_size; - } - pre_delta = delta; - - } - for (int j = 0; j < pre_count; j++){ - ts_block_delta[ts_block_delta_i] =pre_delta; - ts_block_delta_i++; - } - min_delta[1]=(ts_block_delta_i); - min_delta[2]=(value_delta_max-value_delta_min); - int[] new_ts_block_delta = new int[ts_block_delta_i]; - System.arraycopy(ts_block_delta, 0, new_ts_block_delta, 0, ts_block_delta_i); - - return new_ts_block_delta; - } - - - - public static int encodeOutlier2Bytes( - ArrayList<Integer> ts_block_delta, - int bit_width, - int encode_pos, byte[] encoded_result) { - - encode_pos = bitPacking(ts_block_delta, 0, bit_width, encode_pos, encoded_result); - - int n_k = ts_block_delta.size(); - int n_k_b = n_k / 8; - long cur_remaining = 0; // encoded int - int cur_number_bits = 0; // the bit width used of encoded int - for (int i = n_k_b * 8; i < n_k; i++) { - long cur_value = ts_block_delta.get(i); - int cur_bit_width = bit_width; // remaining bit width of current value - - if (cur_number_bits + bit_width >= 32) { - cur_remaining <<= (32 - cur_number_bits); - cur_bit_width = bit_width - 32 + cur_number_bits; - cur_remaining += ((cur_value >> cur_bit_width)); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - - cur_remaining = 0; - cur_number_bits = 0; - } - - cur_remaining <<= cur_bit_width; - cur_number_bits += cur_bit_width; - cur_remaining += (((cur_value << (32 - cur_bit_width)) & 0xFFFFFFFFL) >> (32 - cur_bit_width)); - } - cur_remaining <<= (32 - cur_number_bits); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - return encode_pos; - - } - - - public static ArrayList<Integer> decodeOutlier2Bytes( - byte[] encoded, - int decode_pos, - int bit_width, - int length, - ArrayList<Integer> encoded_pos_result - ) { - - int n_k_b = length / 8; - int remaining = length - n_k_b * 8; - ArrayList<Integer> result_list = new ArrayList<>(decodeBitPacking(encoded, decode_pos, bit_width, n_k_b * 8 + 1)); - decode_pos += n_k_b * bit_width; - - ArrayList<Long> int_remaining = new ArrayList<>(); - int int_remaining_size = remaining * bit_width / 32 + 1; - for (int j = 0; j < int_remaining_size; j++) { - - int_remaining.add(bytesLong2Integer(encoded, decode_pos)); - decode_pos += 4; - } - - int cur_remaining_bits = 32; // remaining bit width of current value - long cur_number = int_remaining.get(0); - int cur_number_i = 1; - for (int i = n_k_b * 8; i < length; i++) { - if (bit_width < cur_remaining_bits) { - int tmp = (int) (cur_number >> (32 - bit_width)); - result_list.add(tmp); - cur_number <<= bit_width; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits -= bit_width; - } else { - int tmp = (int) (cur_number >> (32 - cur_remaining_bits)); - int remain_bits = bit_width - cur_remaining_bits; - tmp <<= remain_bits; - - cur_number = int_remaining.get(cur_number_i); - cur_number_i++; - tmp += (cur_number >> (32 - remain_bits)); - result_list.add(tmp); - cur_number <<= remain_bits; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits = 32 - remain_bits; - } - } - encoded_pos_result.add(decode_pos); - return result_list; - } - - private static void addToArchiveCompression(SevenZOutputFile out, File file, String dir) { - String name = dir + File.separator + file.getName(); - if(dir.equals(".")) { - name = file.getName(); - } - if (file.isFile()){ - SevenZArchiveEntry entry = null; - FileInputStream in = null; - try { - entry = out.createArchiveEntry(file, name); - out.putArchiveEntry(entry); - in = new FileInputStream(file); - byte[] b = new byte[1024]; - int count = 0; - while ((count = in.read(b)) > 0) { - out.write(b, 0, count); - } - } catch (IOException e) { - e.printStackTrace(); - } finally { - try { - out.closeArchiveEntry(); - in.close(); - } catch (IOException e) { - e.printStackTrace(); - } - - } - } else if (file.isDirectory()) { - File[] children = file.listFiles(); - if (children != null){ - for (File child : children){ - addToArchiveCompression(out, child, name); - } - } - } else { - System.out.println(file.getName() + " is not supported"); - } - } - - private static int BOSEncodeBits(int[] ts_block_delta, - int init_block_size, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - ArrayList<Integer> repeat_count, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal = new ArrayList<>(); - int k1 = 0; - int k2 = 0; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if (cur_value < final_k_start_value) { - final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - - - k1++; - - - } else if (cur_value >= final_k_end_value) { - final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { - final_normal.add(cur_value - final_x_l_plus); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - int size = repeat_count.size(); - intByte2Bytes(size,encode_pos,cur_byte); - encode_pos += 1; - - if (size != 0) - encode_pos =encodeOutlier2Bytes(repeat_count, getBitWith(init_block_size-1),encode_pos,cur_byte); - - - int bit_width_final = 0; - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - - if(k1==0 && k2==0){ - bit_width_final = getBitWith(max_delta_value); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// return encode_pos; - } - else{ - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - - bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } - } - - encode_pos =encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - if (k1 != 0) - encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); - if (k2 != 0) - encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); - return encode_pos; - - } - - - - - private static int BOSBlockEncoder(int[] ts_block, int block_i, int block_size,int remaining, int encode_pos , byte[] cur_byte) { - - ArrayList<Integer> repeat_count = new ArrayList<>(); - int init_block_size = block_size; - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block,block_i, init_block_size,remaining, min_delta, repeat_count); - block_size = min_delta[1]; - int max_delta_value = min_delta[2]; - - int max_bit_width = getBitWith(max_delta_value) + 1; - - - int[] findMedianArray = new int[block_size]; - System.arraycopy(ts_block_delta, 0, findMedianArray, 0, block_size); - - int median = findMedian(findMedianArray); - - // int xl= median; - // int xu = median; - // xl = 2 median - xu - // xl = xu - 2 ^ beta - int left_number = 0; - int right_number = 0; - - int length_outlier = block_size; -// for(int value:findMedianArray){ -// if(value<=median) left_number++; -// if (value >= median) right_number ++; -// } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2) * (block_size)); - - - int[] count_left = new int[max_bit_width]; - int[] count_right = new int[max_bit_width]; - int count_0 = 0; - - - for(int i=0;i<length_outlier;i++){ - int cur_value = ts_block_delta[i]; - if(cur_value > median){ - int beta = getBitWith(cur_value - median) ; - count_right[beta] ++; - } else if (cur_value < median) { - int beta = getBitWith(median - cur_value) ; - count_left[beta] ++; - }else{ - count_0 ++; - } - - - } - - for(int beta = max_bit_width - 1; beta > 0 ; beta --){ - left_number += count_left[beta]; - right_number += count_right[beta]; - int pow_beta = (int) pow(2,beta-1); - int xu = min(max_delta_value+1, median + pow_beta) ; - int xl = max(median - pow_beta,-1); - int cur_bits = Math.min((left_number + right_number) * getBitWith(block_size - 1), block_size + left_number + right_number); - cur_bits += left_number * getBitWith(xl); - cur_bits += right_number * getBitWith(max_delta_value - xu); - cur_bits += (block_size - left_number - right_number) * getBitWith(xu - xl - 2); - if (cur_bits < min_bits) { - min_bits = cur_bits; - - final_k_start_value = xl; - final_x_l_plus = xl + 1; - final_k_end_value = xu; - final_x_u_minus = xu -1; - } - - } - - encode_pos = BOSEncodeBits(ts_block_delta,init_block_size, final_k_start_value,final_x_l_plus, final_k_end_value,final_x_u_minus, - max_delta_value, min_delta,repeat_count, encode_pos , cur_byte); - - return encode_pos; - } - - public static int BOSEncoder( - int[] data, int block_size, byte[] encoded_result) { - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { - - encode_pos = BOSBlockEncoder(data, i, block_size, block_size,encode_pos,encoded_result); -// System.out.println(encode_pos); - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } else { - - int start = block_num * block_size; - int remaining = length_all-start; - - - encode_pos = BOSBlockEncoder(data, block_num, block_size,remaining, encode_pos,encoded_result); - -// int[] ts_block = new int[length_all-start]; -// if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, length_all - start); -// -// -// encode_pos = BOSBlockEncoder(ts_block, encode_pos,encoded_result); - - } - - - return encode_pos; - } - - public static int EncodeBits(int num, - int bit_width, - int encode_pos, - byte[] cur_byte, - int[] bit_index_list){ - // 找到要插入的位的索引 - int bit_index = bit_index_list[0] ;//cur_byte[encode_pos + 1]; - - // 计算数值的起始位位置 - int remaining_bits = bit_width; - - while (remaining_bits > 0) { - // 计算在当前字节中可以使用的位数 - int available_bits = bit_index; - int bits_to_write = Math.min(available_bits, remaining_bits); - - // 更新 bit_index - bit_index = available_bits - bits_to_write; - - // 计算要写入的位的掩码和数值 - int mask = (1 << bits_to_write) - 1; - int bits = (num >> (remaining_bits - bits_to_write)) & mask; - - // 写入到当前位置 - cur_byte[encode_pos] &= (byte) ~(mask << bit_index); // 清除对应位置的位 - cur_byte[encode_pos] |= (byte) (bits << bit_index); - - // 更新位宽和数值 - remaining_bits -= bits_to_write; - if (bit_index == 0) { - bit_index = 8; - encode_pos++; - } - } - bit_index_list[0] = bit_index; -// cur_byte[encode_pos + 1] = (byte) bit_index; - return encode_pos; - } - private static int BOSEncodeBitsImprove(int[] ts_block_delta, - int init_block_size, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - ArrayList<Integer> repeat_count, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - - int k1 = 0; - int k2 = 0; - - - - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { -// encode_pos = EncodeBits(cur_value,left_bit_width,encode_pos,cur_byte); -// final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - k1++; - - - } else if (cur_value >= final_k_end_value) { -// encode_pos = EncodeBits(cur_value- final_k_end_value,right_bit_width,encode_pos,cur_byte); -// final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { -// final_normal.add(cur_value - final_x_l_plus); -// encode_pos = EncodeBits(cur_value- final_x_l_plus,right_bit_width,encode_pos,cur_byte); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - - int size = repeat_count.size(); - intByte2Bytes(size,encode_pos,cur_byte); - encode_pos += 1; - - int[] bit_index_list = new int[1]; - bit_index_list[0] = 8; - if (size != 0){ - int bit_width_init = getBitWith(init_block_size-1); - for(int repeat_count_v:repeat_count){ - encode_pos = EncodeBits(repeat_count_v, bit_width_init, encode_pos, cur_byte, bit_index_list); - } - if(bit_index_list[0] != 8){ - bit_index_list[0] = 8; - encode_pos ++; - } - } -// encode_pos =encodeOutlier2Bytes(repeat_count, getBitWith(init_block_size-1),encode_pos,cur_byte); - -// int2Bytes(min_delta[1],encode_pos,cur_byte); -// encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - - - if(final_k_start_value<0 && final_k_end_value > max_delta_value){ -// int bit_width_final= getBitWith(final_x_u_minus - final_x_l_plus); -// cur_byte[encode_pos+1] = 8; - bit_index_list[0] = 8; - for (int cur_value : ts_block_delta) { - encode_pos = EncodeBits(cur_value, bit_width_final, encode_pos, cur_byte, bit_index_list); -// final_normal.add(cur_value); - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } -// cur_byte[encode_pos+1] = 0; -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - return encode_pos; - } - - - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } -// cur_byte[encode_pos+1] = 8; -// bit_index_list[0] = 8; - for (int cur_value : ts_block_delta) { - if (cur_value <= final_k_start_value) { - encode_pos = EncodeBits(cur_value, left_bit_width, encode_pos, cur_byte,bit_index_list); - } else if (cur_value >= final_k_end_value) { - encode_pos = EncodeBits(cur_value - final_k_end_value, right_bit_width, encode_pos, cur_byte,bit_index_list); - } else { - encode_pos = EncodeBits(cur_value - final_x_l_plus, bit_width_final, encode_pos, cur_byte,bit_index_list); - } - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } - -// cur_byte[encode_pos+1] = 0; - -// if(k1==0 && k2==0){ -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// -// -// } -// else{ -// int2Bytes(final_x_l_plus,encode_pos,cur_byte); -// encode_pos += 4; -// int2Bytes(final_k_end_value,encode_pos,cur_byte); -// encode_pos += 4; -// -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(left_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(right_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// if (final_alpha == 0) { // 0 -// -// for (int i : bitmap_outlier) { -// -// intByte2Bytes(i,encode_pos,cur_byte); -// encode_pos += 1; -// } -// } else { -// encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// } -// } - - -// if(k1+k2!=block_size) -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// if (k1 != 0) -// encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); -// if (k2 != 0) -// encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); -// System.out.println(encode_pos); - return encode_pos; - - } - private static int BOSBlockEncoderImprove(int[] ts_block, int block_i, int block_size,int remaining, int encode_pos , byte[] cur_byte) { - - ArrayList<Integer> repeat_count = new ArrayList<>(); - int init_block_size = block_size; - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block,block_i, init_block_size,remaining, min_delta, repeat_count); - block_size = min_delta[1]; - int max_delta_value = min_delta[2]; - - int max_bit_width = getBitWith(max_delta_value) + 1; - - - int[] findMedianArray = new int[block_size]; - System.arraycopy(ts_block_delta, 0, findMedianArray, 0, block_size); - - int median = findMedianImprove(findMedianArray); - - // int xl= median; - // int xu = median; - // xl = 2 median - xu - // xl = xu - 2 ^ beta - int left_number = 0; - int right_number = 0; - - int length_outlier = block_size; -// for(int value:findMedianArray){ -// if(value<=median) left_number++; -// if (value >= median) right_number ++; -// } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2) * (block_size)); - - - int[] count_left = new int[max_bit_width]; - int[] count_right = new int[max_bit_width]; - int count_0 = 0; - - - for(int i=0;i<length_outlier;i++){ - int cur_value = ts_block_delta[i]; - if(cur_value > median){ - int beta = getBitWith(cur_value - median) ; - count_right[beta] ++; - } else if (cur_value < median) { - int beta = getBitWith(median - cur_value) ; - count_left[beta] ++; - }else{ - count_0 ++; - } - - - } - - for(int beta = max_bit_width - 1; beta > 0 ; beta --){ - left_number += count_left[beta]; - right_number += count_right[beta]; - int pow_beta = 1 << (beta-1); - int xu = min(max_delta_value+1, median + pow_beta) ; - int xl = max(median - pow_beta,-1); - int cur_bits = Math.min((left_number + right_number) * getBitWith(block_size - 1), block_size + left_number + right_number); - cur_bits += left_number * getBitWith(xl); - cur_bits += right_number * getBitWith(max_delta_value - xu); - cur_bits += (block_size - left_number - right_number) * getBitWith(xu - xl - 2); - if (cur_bits < min_bits) { - min_bits = cur_bits; - - final_k_start_value = xl; - final_x_l_plus = xl + 1; - final_k_end_value = xu; - final_x_u_minus = xu -1; - } - - } - - encode_pos = BOSEncodeBitsImprove(ts_block_delta,init_block_size, final_k_start_value,final_x_l_plus, final_k_end_value,final_x_u_minus, - max_delta_value, min_delta,repeat_count, encode_pos , cur_byte); - - return encode_pos; - } - - public static int BOSEncoderImprove( - int[] data, int block_size, byte[] encoded_result) { - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { - - - encode_pos = BOSBlockEncoderImprove(data, i, block_size, block_size,encode_pos,encoded_result); -// System.out.println(encode_pos); - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } else { - - int start = block_num * block_size; - int remaining = length_all-start; - - encode_pos = BOSBlockEncoderImprove(data, block_num, block_size,remaining, encode_pos,encoded_result); - -// int[] ts_block = new int[length_all-start]; -// if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, length_all - start); -// -// -// encode_pos = BOSBlockEncoder(ts_block, encode_pos,encoded_result); - - } - - - return encode_pos; - } - - - public static int BOSBlockDecoder(byte[] encoded, int decode_pos, int[] value_list,int init_block_size, int block_size, int[] value_pos_arr) { - - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int count_size = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - ArrayList<Integer> repeat_count = new ArrayList<>(); - if (count_size != 0) { - ArrayList<Integer> repeat_count_result = new ArrayList<>(); - repeat_count = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(init_block_size-1), count_size, repeat_count_result); - decode_pos = repeat_count_result.get(0); - - } - - int cur_block_size = block_size; - for (int i = 1; i < count_size; i += 2) { - cur_block_size -= (repeat_count.get(i) - 1); - } - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal= new ArrayList<>();; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int final_k_start_value = 0; - int final_k_end_value = 0; - int bit_width_final = 0; - int left_bit_width = 0; - int right_bit_width = 0; - - if(k1!=0 || k2 != 0){ - final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (cur_block_size + k1 + k2) / (double) 8); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < block_size ) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - }else { - bit_width_final = bytes2Integer(encoded, decode_pos, 1); - - decode_pos += 1; - } - - -// System.out.println(bit_width_final); -// System.out.println("cur_block_size" + cur_block_size); -// System.out.println(k1); -// System.out.println("k2"+k2); - ArrayList<Integer> decode_pos_normal = new ArrayList<>(); - final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, cur_block_size - k1 - k2, decode_pos_normal); - - decode_pos = decode_pos_normal.get(0); - if (k1 != 0) { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); - - decode_pos = decode_pos_result_left.get(0); - } - if (k2 != 0) { - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); - decode_pos = decode_pos_result_right.get(0); - } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; - int pre_v; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - - int cur_i = 0; - int repeat_i = 0; - for (int i = 0; i < cur_block_size; i++) { - - int current_delta; - if (left_outlier_i >= k1) { - if (right_outlier_i >= k2) { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } - } else if (i == final_left_outlier_index.get(left_outlier_i)) { - current_delta = final_left_outlier.get(left_outlier_i); - left_outlier_i++; - } else { - - if (right_outlier_i >= k2) { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } - } - pre_v = current_delta + min_delta; - if (repeat_i < count_size) { - if (cur_i == repeat_count.get(repeat_i)) { - cur_i += (repeat_count.get(repeat_i+1)); - - for (int j = 0; j < repeat_count.get(repeat_i + 1); j++) { - value_list[value_pos_arr[0]] = pre_v; - value_pos_arr[0]++; - } - repeat_i += 2; - }else { - cur_i++; - value_list[value_pos_arr[0]] = pre_v; - value_pos_arr[0]++; - } - } else { - cur_i++; - value_list[value_pos_arr[0]] = pre_v; - value_pos_arr[0]++; - } - } - return decode_pos; - } - - public static void BOSDecoder(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - int[] value_list = new int[length_all+block_size]; - int[] value_pos_arr = new int[1]; - - for (int k = 0; k < block_num; k++) { -// System.out.println(k); - decode_pos = BOSBlockDecoder(encoded, decode_pos, value_list,block_size, block_size,value_pos_arr); - } - - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - BOSBlockDecoder(encoded, decode_pos, value_list,block_size, remain_length, value_pos_arr); - } - } - - public static int DecodeBits(byte[] cur_byte, int bit_width, int[] decode_pos_list) { - int decode_pos = decode_pos_list[0]; - int bit_index = decode_pos_list[1]; //cur_byte[decode_pos + 1]; - int remaining_bits = bit_width; - int num = 0; - - while (remaining_bits > 0) { - int available_bits = bit_index; - int bits_to_read = Math.min(available_bits, remaining_bits); - - // 计算要读取的位的掩码 - int mask = (1 << bits_to_read) - 1; - int bits = (cur_byte[decode_pos] >> (available_bits - bits_to_read)) & mask; - - // 将读取的位合并到结果中 - num = (num << bits_to_read) | bits; - - // 更新位宽和 bit_index - remaining_bits -= bits_to_read; - bit_index = available_bits - bits_to_read; - - if (bit_index == 0) { - bit_index = 8; - decode_pos++; - } - } - decode_pos_list[0] = decode_pos; - decode_pos_list[1] = bit_index; - - return num; - } - public static int BOSBlockDecoderImprove(byte[] encoded, int decode_pos, int[] value_list,int init_block_size, int block_size, int[] value_pos_arr) { - - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - -// int value0 = bytes2Integer(encoded, decode_pos, 4); -// decode_pos += 4; -// value_list[value_pos_arr[0]] =value0; -// value_pos_arr[0] ++; - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int count_size = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - int[] decode_list = new int[2]; - decode_list[0]= decode_pos; - decode_list[1]= 8; - - int valuePos = value_pos_arr[0]; - -// ArrayList<Integer> repeat_count = new ArrayList<>(); - ArrayList<Integer> repeat_count = new ArrayList<>(); - if (count_size != 0) { - int bit_width_init = getBitWith(init_block_size-1); - for(int i = 0;i<count_size;i++){ - int repeat_count_v = DecodeBits(encoded, bit_width_init, decode_list); - repeat_count.add(repeat_count_v); - } - - if(decode_list[1] != 8){ - decode_list[1] = 8; - decode_list[0] ++; - } -// repeat_count = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(init_block_size-1), count_size, repeat_count_result); - decode_pos = decode_list[0]; -// decode_list[1]= 8; - } - - int cur_block_size = block_size; - for (int i = 1; i < count_size; i += 2) { - cur_block_size -= (repeat_count.get(i) - 1); - } - - int bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - - - int pre_v; - int cur_i = 0; - int repeat_i = 0; - if(k1==0 && k2==0){ -// int pre_v = value0; - decode_list[0] = decode_pos; - decode_list[1]= 8; - for (int i = 0; i < cur_block_size; i++) { - pre_v = min_delta + DecodeBits(encoded, bit_width_final, decode_list); -// value_list[value_pos_arr[0]++] = pre_v; - if (repeat_i < count_size && cur_i == repeat_count.get(repeat_i)) { - cur_i += (repeat_count.get(repeat_i+1)); - - for (int j = 0; j < repeat_count.get(repeat_i + 1); j++) { - value_list[value_pos_arr[0]++] = pre_v; - } - repeat_i += 2; - } else { - cur_i++; - value_list[value_pos_arr[0]++] = pre_v; - } - } - if(decode_list[1] != 8){ - decode_list[1] = 8; - decode_list[0] ++; - } - -// value_pos_arr[0] = valuePos; - return decode_list[0]; - } - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); -// ArrayList<Integer> final_left_outlier = new ArrayList<>(); -// ArrayList<Integer> final_right_outlier = new ArrayList<>(); -// ArrayList<Integer> final_normal= new ArrayList<>();; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int final_k_start_value = 0; - int final_k_end_value = 0; -// int bit_width_final = 0; - int left_bit_width = 0; - int right_bit_width = 0; - - final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - -// bit_width_final = bytes2Integer(encoded, decode_pos, 1); -// decode_pos += 1; - - left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (cur_block_size + k1 + k2) / (double) 8); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < cur_block_size ) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(cur_block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(cur_block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - - - - - -// ArrayList<Integer> decode_pos_normal = new ArrayList<>(); -// final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size - k1 - k2, decode_pos_normal); -// -// decode_pos = decode_pos_normal.get(0); -// if (k1 != 0) { -// ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); -// final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); -// decode_pos = decode_pos_result_left.get(0); -// } -// if (k2 != 0) { -// ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); -// final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); -// decode_pos = decode_pos_result_right.get(0); -// } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; -// int pre_v = value0; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - -// Precompute constants - int normalOffset = min_delta + final_k_start_value; - int rightOutlierOffset = min_delta + final_k_end_value; - -// Initialize indices and pre-fetch next outlier positions - int leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - int rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - decode_list[0]= decode_pos; -// decode_list[1]= 8; - // Use a local variable for the position - for (int i = 0; i < cur_block_size; i++) { -// int currentDelta; - if (i == leftOutlierNextIndex) { - // Process left outlier - pre_v = min_delta + DecodeBits(encoded, left_bit_width, decode_list); // final_left_outlier.get(left_outlier_i); - left_outlier_i++; - leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - } else if (i == rightOutlierNextIndex) { - // Process right outlier - pre_v = rightOutlierOffset + DecodeBits(encoded, right_bit_width, decode_list);// final_right_outlier.get(right_outlier_i); - right_outlier_i++; - rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - } else { - // Process normal value - pre_v = normalOffset + DecodeBits(encoded, bit_width_final, decode_list); - normal_i++; - } - if (repeat_i < count_size && cur_i == repeat_count.get(repeat_i)) { - cur_i += (repeat_count.get(repeat_i+1)); - - for (int j = 0; j < repeat_count.get(repeat_i + 1); j++) { - value_list[value_pos_arr[0]++] = pre_v; - } - repeat_i += 2; - } else { - cur_i++; - value_list[value_pos_arr[0]++] = pre_v; - } - // Update the cumulative value and store it -// pre_v += deZigzag(currentDelta); -// value_list[valuePos++] = pre_v; - } -// value_pos_arr[0] = valuePos; - if(decode_list[1]!=8){ - return decode_list[0]+1; - }else { - return decode_list[0]; - } -// decode_pos = decode_list[0]; -// Update the position in the array - - -// return decode_pos; - } - public static void BOSDecoderImprove(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - int[] value_list = new int[length_all+block_size]; - int[] value_pos_arr = new int[1]; - - for (int k = 0; k < block_num; k++) { -// System.out.println(k); - decode_pos = BOSBlockDecoderImprove(encoded, decode_pos, value_list, block_size, block_size, value_pos_arr); -// System.out.println(decode_pos); - } - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - BOSBlockDecoderImprove(encoded, decode_pos, value_list,block_size, remain_length, value_pos_arr); - } - } - - - public static void main(@org.jetbrains.annotations.NotNull String[] args) throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/rle_bos_m"; - - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(256); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 0; file_i < 1; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - loader.readHeaders(); - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "RLE+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - writer.close(); - } - } - - @Test - public void ExpTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; -// String output_parent_dir = parent_dir + "icde0802/compression_ratio/exp_m"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R2O3_lower_outlier_compare/compression_ratio/bos"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("Synthetic_Exp_100"); - dataset_name.add("Synthetic_Exp_1000"); - dataset_name.add("Synthetic_Exp_10000"); - dataset_name.add("Synthetic_Exp_100000"); - dataset_name.add("Synthetic_Exp_1000000"); - dataset_name.add("Synthetic_Normal_100"); - dataset_name.add("Synthetic_Normal_1000"); - dataset_name.add("Synthetic_Normal_10000"); - dataset_name.add("Synthetic_Normal_100000"); - dataset_name.add("Synthetic_Normal_1000000"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/Exp_100.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Exp_1000.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Exp_10000.csv");// 2 - output_path_list.add(output_parent_dir + "/Exp_100000.csv");// 2 - output_path_list.add(output_parent_dir + "/Exp_1000000.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Normal_100.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Normal_1000.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Normal_10000.csv");//5 - output_path_list.add(output_parent_dir + "/Normal_100000.csv");//5 - output_path_list.add(output_parent_dir + "/Normal_1000000.csv");//5 -// dataset_block_size.add(2048); - - int repeatTime2 = 100; -// for (int file_i = 8; file_i < 9; file_i++) { - - for (int file_i = input_path_list.size()-1; file_i >=0 ; file_i--) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { - System.out.println(f); - if(f.toString().contains(".DS")) continue; - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - - loader.readHeaders(); - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "RLE+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - writer.close(); - } - } - - @Test - public void compressTest() throws IOException { -// String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path - String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R3O2_compare_compression/compression_ratio/rle_m_c"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(256); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 1; -// for (int file_i = 8; file_i < 9; file_i++) { - CompressionType[] compressList = { - CompressionType.LZ4, - CompressionType.LZMA2, - }; - - for (int file_i = input_path_list.size()-1; file_i >=0 ; file_i--) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Compress Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - loader.readHeaders(); - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - inputStream.close(); - - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - - for (CompressionType comp : compressList) { - double ratio = 0; - double compressed_size = 0; - ICompressor compressor = ICompressor.getCompressor(comp); - byte[] compressed = compressor.compress(encoded_result); - - // test compression ratio and compressed size - compressed_size += compressed.length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "RLE+BOS-M", - comp.toString(), - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - double ratio = 0; - double compressed_size = 0; - File outfile = new File(parent_dir + "icde0802/example.bin"); - - // 使用FileOutputStream将byte数组写入文件 - try (FileOutputStream fos = new FileOutputStream(outfile)) { - fos.write(encoded_result); - } catch (IOException e2) { - // 处理可能的I/O异常 - e2.printStackTrace(); - } - - File input = new File(parent_dir + "icde0802/example.bin"); - File output = new File(parent_dir + "icde0802/example.7z"); - SevenZOutputFile out = new SevenZOutputFile(output); - - addToArchiveCompression(out, input, "."); - out.closeArchiveEntry(); - - long compressed = output.length(); - - - // test compression ratio and compressed size - compressed_size += compressed; - double ratioTmp = - (double) compressed / (double) (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - - - String[] record = { - f.toString(), - "RLE+BOS-M", - "7ZIP", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - writer.close(); - } - } - - @Test - public void BOSImproveDecodeTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R1O4_decode_time/compression_ratio/rle_bos_m"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 4; file_i < 5; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[1]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "RLE+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); -// break; - } - writer.close(); - } - } - - @Test - public void BOSImproveEncodeTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/rle_bos_m_improve"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 4; file_i < 5; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[1]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "RLE+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); -// break; - } - writer.close(); - } - } - -}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RLEBOSVTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RLEBOSVTest.java deleted file mode 100644 index 223701c..0000000 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RLEBOSVTest.java +++ /dev/null
@@ -1,2045 +0,0 @@ -package org.apache.iotdb.tsfile.encoding; - -import com.csvreader.CsvReader; -import com.csvreader.CsvWriter; -import org.junit.Test; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; - -import static java.lang.Math.*; - -public class RLEBOSVTest { - - public static int getBitWith(int num) { - if (num == 0) return 1; - else return 32 - Integer.numberOfLeadingZeros(num); - } - public static int getCount(long long1, int mask) { - return ((int) (long1 & mask)); - } - public static int getUniqueValue(long long1, int left_shift) { - return ((int) ((long1) >> left_shift)); - } - - - public static void int2Bytes(int integer,int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - public static void intByte2Bytes(int integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer); - } - - private static void long2intBytes(long integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - - public static int bytes2Integer(byte[] encoded, int start, int num) { - int value = 0; - if (num > 4) { - System.out.println("bytes2Integer error"); - return 0; - } - for (int i = 0; i < num; i++) { - value <<= 8; - int b = encoded[i + start] & 0xFF; - value |= b; - } - return value; - } - - private static long bytesLong2Integer(byte[] encoded, int decode_pos) { - long value = 0; - for (int i = 0; i < 4; i++) { - value <<= 8; - int b = encoded[i + decode_pos] & 0xFF; - value |= b; - } - return value; - } - - public static void pack8Values(ArrayList<Integer> values, int offset, int width, int encode_pos, byte[] encoded_result) { - int bufIdx = 0; - int valueIdx = offset; - // remaining bits for the current unfinished Integer - int leftBit = 0; - - while (valueIdx < 8 + offset) { - // buffer is used for saving 32 bits as a part of result - int buffer = 0; - // remaining size of bits in the 'buffer' - int leftSize = 32; - - // encode the left bits of current Integer to 'buffer' - if (leftBit > 0) { - buffer |= (values.get(valueIdx) << (32 - leftBit)); - leftSize -= leftBit; - leftBit = 0; - valueIdx++; - } - - while (leftSize >= width && valueIdx < 8 + offset) { - // encode one Integer to the 'buffer' - buffer |= (values.get(valueIdx)<< (leftSize - width)); - leftSize -= width; - valueIdx++; - } - // If the remaining space of the buffer can not save the bits for one Integer, - if (leftSize > 0 && valueIdx < 8 + offset) { - // put the first 'leftSize' bits of the Integer into remaining space of the - // buffer - buffer |= (values.get(valueIdx) >>> (width - leftSize)); - leftBit = width - leftSize; - } - - // put the buffer into the final result - for (int j = 0; j < 4; j++) { - encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); - encode_pos ++; - bufIdx++; - if (bufIdx >= width) { - return ; - } - } - } - - } - - public static void unpack8Values(byte[] encoded, int offset,int width, ArrayList<Integer> result_list) { - int byteIdx = offset; - long buffer = 0; - // total bits which have read from 'buf' to 'buffer'. i.e., - // number of available bits to be decoded. - int totalBits = 0; - int valueIdx = 0; - - while (valueIdx < 8) { - // If current available bits are not enough to decode one Integer, - // then add next byte from buf to 'buffer' until totalBits >= width - while (totalBits < width) { - buffer = (buffer << 8) | (encoded[byteIdx] & 0xFF); - byteIdx++; - totalBits += 8; - } - - // If current available bits are enough to decode one Integer, - // then decode one Integer one by one until left bits in 'buffer' is - // not enough to decode one Integer. - while (totalBits >= width && valueIdx < 8) { - result_list.add ((int) (buffer >>> (totalBits - width))); - valueIdx++; - totalBits -= width; - buffer = buffer & ((1L << totalBits) - 1); - } - } - } - - public static int bitPacking(ArrayList<Integer> numbers, int start, int bit_width,int encode_pos, byte[] encoded_result) { - int block_num = (numbers.size()-start) / 8; - for(int i=0;i<block_num;i++){ - pack8Values( numbers, start+i*8, bit_width,encode_pos, encoded_result); - encode_pos +=bit_width; - } - - return encode_pos; - - } - - public static ArrayList<Integer> decodeBitPacking( - byte[] encoded, int decode_pos, int bit_width, int block_size) { - ArrayList<Integer> result_list = new ArrayList<>(); - int block_num = (block_size - 1) / 8; - - for (int i = 0; i < block_num; i++) { // bitpacking - unpack8Values( encoded, decode_pos, bit_width, result_list); - decode_pos += bit_width; - } - return result_list; - } - - - public static int[] getAbsDeltaTsBlock( - int[] ts_block, - int i, - int block_size, - int remaining, - int[] min_delta, - ArrayList<Integer> repeat_count) { - int[] ts_block_delta = new int[remaining]; - - int value_delta_min = Integer.MAX_VALUE; - int value_delta_max = Integer.MIN_VALUE; - int base = i*block_size; - int end = i*block_size+remaining; - for (int j = base; j < end; j++) { - - int integer = ts_block[j]; - if (integer < value_delta_min) value_delta_min = integer; - if (integer > value_delta_max) { - value_delta_max = integer; - } - } - int pre_delta = ts_block[i*block_size]-value_delta_min; - int pre_count = 1; - - min_delta[0]=(value_delta_min); - int repeat_i = 0; - int ts_block_delta_i = 0; - for (int j = base+1; j < end; j++) { - int delta = ts_block[j]-value_delta_min; - if(delta == pre_delta){ - pre_count ++; - } else { - if(pre_count>7){ - repeat_count.add(repeat_i); - repeat_count.add(pre_count); - ts_block_delta[ts_block_delta_i]=pre_delta; - ts_block_delta_i ++; - } else{ - for (int k = 0; k < pre_count; k++){ - ts_block_delta[ts_block_delta_i] =pre_delta; - ts_block_delta_i++; - } - } - pre_count =1; - repeat_i = j - i*block_size; - } - pre_delta = delta; - - } - for (int j = 0; j < pre_count; j++){ - ts_block_delta[ts_block_delta_i] =pre_delta; - ts_block_delta_i++; - } - min_delta[1]=(ts_block_delta_i); - min_delta[2]=(value_delta_max-value_delta_min); - int[] new_ts_block_delta = new int[ts_block_delta_i]; - System.arraycopy(ts_block_delta, 0, new_ts_block_delta, 0, ts_block_delta_i); - - return new_ts_block_delta; - } - - - - public static int encodeOutlier2Bytes( - ArrayList<Integer> ts_block_delta, - int bit_width, - int encode_pos, byte[] encoded_result) { - - encode_pos = bitPacking(ts_block_delta, 0, bit_width, encode_pos, encoded_result); - - int n_k = ts_block_delta.size(); - int n_k_b = n_k / 8; - long cur_remaining = 0; // encoded int - int cur_number_bits = 0; // the bit width used of encoded int - for (int i = n_k_b * 8; i < n_k; i++) { - long cur_value = ts_block_delta.get(i); - int cur_bit_width = bit_width; // remaining bit width of current value - - if (cur_number_bits + bit_width >= 32) { - cur_remaining <<= (32 - cur_number_bits); - cur_bit_width = bit_width - 32 + cur_number_bits; - cur_remaining += ((cur_value >> cur_bit_width)); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - - cur_remaining = 0; - cur_number_bits = 0; - } - - cur_remaining <<= cur_bit_width; - cur_number_bits += cur_bit_width; - cur_remaining += (((cur_value << (32 - cur_bit_width)) & 0xFFFFFFFFL) >> (32 - cur_bit_width)); - } - cur_remaining <<= (32 - cur_number_bits); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - return encode_pos; - - } - - - public static ArrayList<Integer> decodeOutlier2Bytes( - byte[] encoded, - int decode_pos, - int bit_width, - int length, - ArrayList<Integer> encoded_pos_result - ) { - - int n_k_b = length / 8; - int remaining = length - n_k_b * 8; - ArrayList<Integer> result_list = new ArrayList<>(decodeBitPacking(encoded, decode_pos, bit_width, n_k_b * 8 + 1)); - decode_pos += n_k_b * bit_width; - - ArrayList<Long> int_remaining = new ArrayList<>(); - int int_remaining_size = remaining * bit_width / 32 + 1; - for (int j = 0; j < int_remaining_size; j++) { - - int_remaining.add(bytesLong2Integer(encoded, decode_pos)); - decode_pos += 4; - } - - int cur_remaining_bits = 32; // remaining bit width of current value - long cur_number = int_remaining.get(0); - int cur_number_i = 1; - for (int i = n_k_b * 8; i < length; i++) { - if (bit_width < cur_remaining_bits) { - int tmp = (int) (cur_number >> (32 - bit_width)); - result_list.add(tmp); - cur_number <<= bit_width; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits -= bit_width; - } else { - int tmp = (int) (cur_number >> (32 - cur_remaining_bits)); - int remain_bits = bit_width - cur_remaining_bits; - tmp <<= remain_bits; - - cur_number = int_remaining.get(cur_number_i); - cur_number_i++; - tmp += (cur_number >> (32 - remain_bits)); - result_list.add(tmp); - cur_number <<= remain_bits; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits = 32 - remain_bits; - } - } - encoded_pos_result.add(decode_pos); - return result_list; - } - - private static int BOSEncodeBits(int[] ts_block_delta, - int init_block_size, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - ArrayList<Integer> repeat_count, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal = new ArrayList<>(); - int k1 = 0; - int k2 = 0; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if (cur_value < final_k_start_value) { - final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - - - k1++; - - - } else if (cur_value >= final_k_end_value) { - final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { - final_normal.add(cur_value - final_x_l_plus); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - int size = repeat_count.size(); - intByte2Bytes(size,encode_pos,cur_byte); - encode_pos += 1; - - if (size != 0) - encode_pos =encodeOutlier2Bytes(repeat_count, getBitWith(init_block_size-1),encode_pos,cur_byte); - - - - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - int left_bit_width = getBitWith(final_k_start_value); - int right_bit_width = getBitWith(max_delta_value -final_k_end_value ); - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - if (final_alpha == 0) { - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } - encode_pos =encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - if (k1 != 0) - encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); - if (k2 != 0) - encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); - return encode_pos; - - } - - - - - private static int BOSBlockEncoder(int[] ts_block, int block_i, int block_size,int remaining, int encode_pos , byte[] cur_byte) { - - ArrayList<Integer> repeat_count = new ArrayList<>(); - int init_block_size = block_size; - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block,block_i, init_block_size,remaining, min_delta, repeat_count); - block_size = min_delta[1]; - - - int max_delta_value = min_delta[2]; - int[] value_list = new int[block_size]; - int unique_value_count = 0; - int[] value_count_list = new int[max_delta_value+1]; - for(int value:ts_block_delta){ - if(value_count_list[value]==0){ - value_count_list[value] = 1; - value_list[unique_value_count] = value; - unique_value_count ++; - }else{ - value_count_list[value] ++; - } - } - - int left_shift = getBitWith(block_size); - int mask = (1 << left_shift) - 1; - long[] sorted_value_list = new long[unique_value_count]; - int count = 0; - - for(int i=0;i<unique_value_count;i++){ - int value = value_list[i]; - - sorted_value_list[i] = (((long) value) << left_shift) + value_count_list[value]; - } - Arrays.sort(sorted_value_list); - - for(int i=0;i<unique_value_count;i++){ - count += getCount(sorted_value_list[i], mask); - sorted_value_list[i] = (((long)getUniqueValue(sorted_value_list[i], left_shift) ) << left_shift) + count;//new_value_list[i] - } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2 ) * (block_size)); - - int cur_k1 = 0; - - int x_l_plus_value = 0; // x_l_plus - int x_u_minus_value = max_delta_value; // x_u_plus - - for (int end_value_i = 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); // cur_k1 = 0 - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_x_u_minus = x_u_minus_value; - final_k_end_value = x_u_plus_value; - } - } - - int k_start_value = -1; // x_l_minus - - for (int start_value_i = 0; start_value_i < unique_value_count-1; start_value_i++) { - long k_start_valueL = sorted_value_list[start_value_i]; - k_start_value = getUniqueValue(k_start_valueL, left_shift) ; - x_l_plus_value = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - - - cur_k1 = getCount(k_start_valueL,mask); - - int k_end_value; - int cur_bits; - int cur_k2; - k_end_value = max_delta_value + 1; - - cur_bits = 0; - cur_k2 = 0; - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1) * getBitWith(k_end_value- x_l_plus_value); //cur_k2 =0 - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = max_delta_value; - } - - for (int end_value_i = start_value_i + 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - k_end_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - - cur_bits = 0; - cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = x_u_minus_value; - } - - } - } - - - encode_pos = BOSEncodeBits(ts_block_delta,init_block_size, final_k_start_value,final_x_l_plus, final_k_end_value,final_x_u_minus, - max_delta_value, min_delta,repeat_count, encode_pos , cur_byte); - - return encode_pos; - } - - public static int BOSEncoder( - int[] data, int block_size, byte[] encoded_result) { - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { - encode_pos = BOSBlockEncoder(data, i, block_size, block_size,encode_pos,encoded_result); -// System.out.println(encode_pos); - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } else { - - int start = block_num * block_size; - int remaining = length_all-start; - - - encode_pos = BOSBlockEncoder(data, block_num, block_size,remaining, encode_pos,encoded_result); - -// int[] ts_block = new int[length_all-start]; -// if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, length_all - start); -// -// -// encode_pos = BOSBlockEncoder(ts_block, encode_pos,encoded_result); - - } - - - return encode_pos; - } - - public static int EncodeBits(int num, - int bit_width, - int encode_pos, - byte[] cur_byte, - int[] bit_index_list){ - // 找到要插入的位的索引 - int bit_index = bit_index_list[0] ;//cur_byte[encode_pos + 1]; - - // 计算数值的起始位位置 - int remaining_bits = bit_width; - - while (remaining_bits > 0) { - // 计算在当前字节中可以使用的位数 - int available_bits = bit_index; - int bits_to_write = Math.min(available_bits, remaining_bits); - - // 更新 bit_index - bit_index = available_bits - bits_to_write; - - // 计算要写入的位的掩码和数值 - int mask = (1 << bits_to_write) - 1; - int bits = (num >> (remaining_bits - bits_to_write)) & mask; - - // 写入到当前位置 - cur_byte[encode_pos] &= (byte) ~(mask << bit_index); // 清除对应位置的位 - cur_byte[encode_pos] |= (byte) (bits << bit_index); - - // 更新位宽和数值 - remaining_bits -= bits_to_write; - if (bit_index == 0) { - bit_index = 8; - encode_pos++; - } - } - bit_index_list[0] = bit_index; -// cur_byte[encode_pos + 1] = (byte) bit_index; - return encode_pos; - } - private static int BOSEncodeBitsImprove(int[] ts_block_delta, - int init_block_size, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - ArrayList<Integer> repeat_count, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - - int k1 = 0; - int k2 = 0; - - - - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { -// encode_pos = EncodeBits(cur_value,left_bit_width,encode_pos,cur_byte); -// final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - k1++; - - - } else if (cur_value >= final_k_end_value) { -// encode_pos = EncodeBits(cur_value- final_k_end_value,right_bit_width,encode_pos,cur_byte); -// final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { -// final_normal.add(cur_value - final_x_l_plus); -// encode_pos = EncodeBits(cur_value- final_x_l_plus,right_bit_width,encode_pos,cur_byte); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - - int size = repeat_count.size(); - intByte2Bytes(size,encode_pos,cur_byte); - encode_pos += 1; - - int[] bit_index_list = new int[1]; - bit_index_list[0] = 8; - if (size != 0){ - int bit_width_init = getBitWith(init_block_size-1); - for(int repeat_count_v:repeat_count){ - encode_pos = EncodeBits(repeat_count_v, bit_width_init, encode_pos, cur_byte, bit_index_list); - } - if(bit_index_list[0] != 8){ - bit_index_list[0] = 8; - encode_pos ++; - } - } -// encode_pos =encodeOutlier2Bytes(repeat_count, getBitWith(init_block_size-1),encode_pos,cur_byte); - -// int2Bytes(min_delta[1],encode_pos,cur_byte); -// encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - - - if(final_k_start_value<0 && final_k_end_value > max_delta_value){ -// int bit_width_final= getBitWith(final_x_u_minus - final_x_l_plus); -// cur_byte[encode_pos+1] = 8; - bit_index_list[0] = 8; - for (int cur_value : ts_block_delta) { - encode_pos = EncodeBits(cur_value, bit_width_final, encode_pos, cur_byte, bit_index_list); -// final_normal.add(cur_value); - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } -// cur_byte[encode_pos+1] = 0; -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - return encode_pos; - } - - - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } -// cur_byte[encode_pos+1] = 8; -// bit_index_list[0] = 8; - for (int cur_value : ts_block_delta) { - if (cur_value <= final_k_start_value) { - encode_pos = EncodeBits(cur_value, left_bit_width, encode_pos, cur_byte,bit_index_list); - } else if (cur_value >= final_k_end_value) { - encode_pos = EncodeBits(cur_value - final_k_end_value, right_bit_width, encode_pos, cur_byte,bit_index_list); - } else { - encode_pos = EncodeBits(cur_value - final_x_l_plus, bit_width_final, encode_pos, cur_byte,bit_index_list); - } - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } - -// cur_byte[encode_pos+1] = 0; - -// if(k1==0 && k2==0){ -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// -// -// } -// else{ -// int2Bytes(final_x_l_plus,encode_pos,cur_byte); -// encode_pos += 4; -// int2Bytes(final_k_end_value,encode_pos,cur_byte); -// encode_pos += 4; -// -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(left_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(right_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// if (final_alpha == 0) { // 0 -// -// for (int i : bitmap_outlier) { -// -// intByte2Bytes(i,encode_pos,cur_byte); -// encode_pos += 1; -// } -// } else { -// encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// } -// } - - -// if(k1+k2!=block_size) -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// if (k1 != 0) -// encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); -// if (k2 != 0) -// encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); -// System.out.println(encode_pos); - return encode_pos; - - } - private static int BOSBlockEncoderImprove(int[] ts_block, int block_i, int block_size,int remaining, int encode_pos , byte[] cur_byte) { - - ArrayList<Integer> repeat_count = new ArrayList<>(); - int init_block_size = block_size; - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block,block_i, init_block_size,remaining, min_delta, repeat_count); - block_size = min_delta[1]; - - - int max_delta_value = min_delta[2]; - int[] value_list = new int[block_size]; - int unique_value_count = 0; - int[] value_count_list = new int[max_delta_value+1]; - for(int value:ts_block_delta){ - if(value_count_list[value]==0){ - value_count_list[value] = 1; - value_list[unique_value_count] = value; - unique_value_count ++; - }else{ - value_count_list[value] ++; - } - } - - int left_shift = getBitWith(block_size); - int mask = (1 << left_shift) - 1; - long[] sorted_value_list = new long[unique_value_count]; - int count = 0; - - for(int i=0;i<unique_value_count;i++){ - int value = value_list[i]; - - sorted_value_list[i] = (((long) value) << left_shift) + value_count_list[value]; - } - Arrays.sort(sorted_value_list); - - for(int i=0;i<unique_value_count;i++){ - count += getCount(sorted_value_list[i], mask); - sorted_value_list[i] = (((long)getUniqueValue(sorted_value_list[i], left_shift) ) << left_shift) + count;//new_value_list[i] - } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2 ) * (block_size)); - - int cur_k1 = 0; - - int x_l_plus_value = 0; // x_l_plus - int x_u_minus_value = max_delta_value; // x_u_plus - - for (int end_value_i = 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); // cur_k1 = 0 - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_x_u_minus = x_u_minus_value; - final_k_end_value = x_u_plus_value; - } - } - - int k_start_value = -1; // x_l_minus - - for (int start_value_i = 0; start_value_i < unique_value_count-1; start_value_i++) { - long k_start_valueL = sorted_value_list[start_value_i]; - k_start_value = getUniqueValue(k_start_valueL, left_shift) ; - x_l_plus_value = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - - - cur_k1 = getCount(k_start_valueL,mask); - - int k_end_value; - int cur_bits; - int cur_k2; - k_end_value = max_delta_value + 1; - - cur_bits = 0; - cur_k2 = 0; - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1) * getBitWith(k_end_value- x_l_plus_value); //cur_k2 =0 - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = max_delta_value; - } - - for (int end_value_i = start_value_i + 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - k_end_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - - cur_bits = 0; - cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = x_u_minus_value; - } - - } - } - - - encode_pos = BOSEncodeBitsImprove(ts_block_delta,init_block_size, final_k_start_value,final_x_l_plus, final_k_end_value,final_x_u_minus, - max_delta_value, min_delta,repeat_count, encode_pos , cur_byte); - - return encode_pos; - } - - public static int BOSEncoderImprove( - int[] data, int block_size, byte[] encoded_result) { - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { - - - encode_pos = BOSBlockEncoderImprove(data, i, block_size, block_size,encode_pos,encoded_result); -// System.out.println(encode_pos); - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } else { - - int start = block_num * block_size; - int remaining = length_all-start; - - encode_pos = BOSBlockEncoderImprove(data, block_num, block_size,remaining, encode_pos,encoded_result); - -// int[] ts_block = new int[length_all-start]; -// if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, length_all - start); -// -// -// encode_pos = BOSBlockEncoder(ts_block, encode_pos,encoded_result); - - } - - - return encode_pos; - } - - public static int BOSBlockDecoder(byte[] encoded, int decode_pos, int[] value_list,int init_block_size, int block_size, int[] value_pos_arr) { - - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int count_size = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - ArrayList<Integer> repeat_count = new ArrayList<>(); - if (count_size != 0) { - ArrayList<Integer> repeat_count_result = new ArrayList<>(); - repeat_count = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(init_block_size-1), count_size, repeat_count_result); - decode_pos = repeat_count_result.get(0); - - } - - int cur_block_size = block_size; - for (int i = 1; i < count_size; i += 2) { - cur_block_size -= (repeat_count.get(i) - 1); - } - - int final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - int left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - int right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - - if (final_alpha == 0) { // 0 - int bitmap_bytes = (int) Math.ceil((double) (cur_block_size + k1 + k2) / (double) 8); -// System.out.println("bitmap_bytes:" + bitmap_bytes); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < cur_block_size) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(cur_block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(cur_block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - - } - - - ArrayList<Integer> decode_pos_normal = new ArrayList<>(); - final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, cur_block_size - k1 - k2, decode_pos_normal); - - decode_pos = decode_pos_normal.get(0); - if (k1 != 0) { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); - - decode_pos = decode_pos_result_left.get(0); - } - if (k2 != 0) { - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); - decode_pos = decode_pos_result_right.get(0); - } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; - int pre_v; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - - int cur_i = 0; - int repeat_i = 0; - for (int i = 0; i < cur_block_size; i++) { - - int current_delta; - if (left_outlier_i >= k1) { - if (right_outlier_i >= k2) { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } - } else if (i == final_left_outlier_index.get(left_outlier_i)) { - current_delta = final_left_outlier.get(left_outlier_i); - left_outlier_i++; - } else { - - if (right_outlier_i >= k2) { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } - } - pre_v = current_delta + min_delta; - if (repeat_i < count_size) { - if (cur_i == repeat_count.get(repeat_i)) { - cur_i += (repeat_count.get(repeat_i+1)); - - for (int j = 0; j < repeat_count.get(repeat_i + 1); j++) { - value_list[value_pos_arr[0]] = pre_v; - value_pos_arr[0]++; - } - repeat_i += 2; - }else { - cur_i++; - value_list[value_pos_arr[0]] = pre_v; - value_pos_arr[0]++; - } - } else { - cur_i++; - value_list[value_pos_arr[0]] = pre_v; - value_pos_arr[0]++; - } - } - return decode_pos; - } - - public static void BOSDecoder(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - int[] value_list = new int[length_all+block_size]; - int[] value_pos_arr = new int[1]; - - for (int k = 0; k < block_num; k++) { - decode_pos = BOSBlockDecoder(encoded, decode_pos, value_list,block_size, block_size,value_pos_arr); - } - - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - BOSBlockDecoder(encoded, decode_pos, value_list,block_size, remain_length, value_pos_arr); - } - } - - public static int DecodeBits(byte[] cur_byte, int bit_width, int[] decode_pos_list) { - int decode_pos = decode_pos_list[0]; - int bit_index = decode_pos_list[1]; //cur_byte[decode_pos + 1]; - int remaining_bits = bit_width; - int num = 0; - - while (remaining_bits > 0) { - int available_bits = bit_index; - int bits_to_read = Math.min(available_bits, remaining_bits); - - // 计算要读取的位的掩码 - int mask = (1 << bits_to_read) - 1; - int bits = (cur_byte[decode_pos] >> (available_bits - bits_to_read)) & mask; - - // 将读取的位合并到结果中 - num = (num << bits_to_read) | bits; - - // 更新位宽和 bit_index - remaining_bits -= bits_to_read; - bit_index = available_bits - bits_to_read; - - if (bit_index == 0) { - bit_index = 8; - decode_pos++; - } - } - decode_pos_list[0] = decode_pos; - decode_pos_list[1] = bit_index; - - return num; - } - - public static int BOSBlockDecoderImprove(byte[] encoded, int decode_pos, int[] value_list,int init_block_size, int block_size, int[] value_pos_arr) { - - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - -// int value0 = bytes2Integer(encoded, decode_pos, 4); -// decode_pos += 4; -// value_list[value_pos_arr[0]] =value0; -// value_pos_arr[0] ++; - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int count_size = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - int[] decode_list = new int[2]; - decode_list[0]= decode_pos; - decode_list[1]= 8; - - int valuePos = value_pos_arr[0]; - -// ArrayList<Integer> repeat_count = new ArrayList<>(); - ArrayList<Integer> repeat_count = new ArrayList<>(); - if (count_size != 0) { - int bit_width_init = getBitWith(init_block_size-1); - for(int i = 0;i<count_size;i++){ - int repeat_count_v = DecodeBits(encoded, bit_width_init, decode_list); - repeat_count.add(repeat_count_v); - } - - if(decode_list[1] != 8){ - decode_list[1] = 8; - decode_list[0] ++; - } -// repeat_count = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(init_block_size-1), count_size, repeat_count_result); - decode_pos = decode_list[0]; -// decode_list[1]= 8; - } - - int cur_block_size = block_size; - for (int i = 1; i < count_size; i += 2) { - cur_block_size -= (repeat_count.get(i) - 1); - } - - int bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - - - int pre_v; - int cur_i = 0; - int repeat_i = 0; - if(k1==0 && k2==0){ -// int pre_v = value0; - decode_list[0] = decode_pos; - decode_list[1]= 8; - for (int i = 0; i < cur_block_size; i++) { - pre_v = min_delta + DecodeBits(encoded, bit_width_final, decode_list); -// value_list[value_pos_arr[0]++] = pre_v; - if (repeat_i < count_size && cur_i == repeat_count.get(repeat_i)) { - cur_i += (repeat_count.get(repeat_i+1)); - - for (int j = 0; j < repeat_count.get(repeat_i + 1); j++) { - value_list[value_pos_arr[0]++] = pre_v; - } - repeat_i += 2; - } else { - cur_i++; - value_list[value_pos_arr[0]++] = pre_v; - } - } - if(decode_list[1] != 8){ - decode_list[1] = 8; - decode_list[0] ++; - } - -// value_pos_arr[0] = valuePos; - return decode_list[0]; - } - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); -// ArrayList<Integer> final_left_outlier = new ArrayList<>(); -// ArrayList<Integer> final_right_outlier = new ArrayList<>(); -// ArrayList<Integer> final_normal= new ArrayList<>();; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int final_k_start_value = 0; - int final_k_end_value = 0; -// int bit_width_final = 0; - int left_bit_width = 0; - int right_bit_width = 0; - - final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - -// bit_width_final = bytes2Integer(encoded, decode_pos, 1); -// decode_pos += 1; - - left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (cur_block_size + k1 + k2) / (double) 8); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < cur_block_size ) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(cur_block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(cur_block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - - - - - -// ArrayList<Integer> decode_pos_normal = new ArrayList<>(); -// final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size - k1 - k2, decode_pos_normal); -// -// decode_pos = decode_pos_normal.get(0); -// if (k1 != 0) { -// ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); -// final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); -// decode_pos = decode_pos_result_left.get(0); -// } -// if (k2 != 0) { -// ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); -// final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); -// decode_pos = decode_pos_result_right.get(0); -// } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; -// int pre_v = value0; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - -// Precompute constants - int normalOffset = min_delta + final_k_start_value; - int rightOutlierOffset = min_delta + final_k_end_value; - -// Initialize indices and pre-fetch next outlier positions - int leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - int rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - decode_list[0]= decode_pos; -// decode_list[1]= 8; - // Use a local variable for the position - for (int i = 0; i < cur_block_size; i++) { -// int currentDelta; - if (i == leftOutlierNextIndex) { - // Process left outlier - pre_v = min_delta + DecodeBits(encoded, left_bit_width, decode_list); // final_left_outlier.get(left_outlier_i); - left_outlier_i++; - leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - } else if (i == rightOutlierNextIndex) { - // Process right outlier - pre_v = rightOutlierOffset + DecodeBits(encoded, right_bit_width, decode_list);// final_right_outlier.get(right_outlier_i); - right_outlier_i++; - rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - } else { - // Process normal value - pre_v = normalOffset + DecodeBits(encoded, bit_width_final, decode_list); - normal_i++; - } - if (repeat_i < count_size && cur_i == repeat_count.get(repeat_i)) { - cur_i += (repeat_count.get(repeat_i+1)); - - for (int j = 0; j < repeat_count.get(repeat_i + 1); j++) { - value_list[value_pos_arr[0]++] = pre_v; - } - repeat_i += 2; - } else { - cur_i++; - value_list[value_pos_arr[0]++] = pre_v; - } - // Update the cumulative value and store it -// pre_v += deZigzag(currentDelta); -// value_list[valuePos++] = pre_v; - } -// value_pos_arr[0] = valuePos; - if(decode_list[1]!=8){ - return decode_list[0]+1; - }else { - return decode_list[0]; - } -// decode_pos = decode_list[0]; -// Update the position in the array - - -// return decode_pos; - } - public static void BOSDecoderImprove(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - int[] value_list = new int[length_all+block_size]; - int[] value_pos_arr = new int[1]; - - for (int k = 0; k < block_num; k++) { -// System.out.println(k); - decode_pos = BOSBlockDecoderImprove(encoded, decode_pos, value_list, block_size, block_size, value_pos_arr); -// System.out.println(decode_pos); - } - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - BOSBlockDecoderImprove(encoded, decode_pos, value_list,block_size, remain_length, value_pos_arr); - } - } - - public static void main(@org.jetbrains.annotations.NotNull String[] args) throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/rle_bos"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(256); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 8; file_i < 9; file_i++) { - - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - - loader.readHeaders(); - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "RLE+BOS-V", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - writer.close(); - - - } - } - - @Test - public void ExpTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/exp_v"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - - dataset_name.add("Synthetic_Exp_100"); - dataset_name.add("Synthetic_Exp_1000"); - dataset_name.add("Synthetic_Exp_10000"); - dataset_name.add("Synthetic_Exp_100000"); - dataset_name.add("Synthetic_Exp_1000000"); - dataset_name.add("Synthetic_Normal_100"); - dataset_name.add("Synthetic_Normal_1000"); - dataset_name.add("Synthetic_Normal_10000"); - dataset_name.add("Synthetic_Normal_100000"); - dataset_name.add("Synthetic_Normal_1000000"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/Exp_100.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Exp_1000.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Exp_10000.csv");// 2 - output_path_list.add(output_parent_dir + "/Exp_100000.csv");// 2 - output_path_list.add(output_parent_dir + "/Exp_1000000.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Normal_100.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Normal_1000.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Normal_10000.csv");//5 - output_path_list.add(output_parent_dir + "/Normal_100000.csv");//5 - output_path_list.add(output_parent_dir + "/Normal_1000000.csv");//5 -// dataset_block_size.add(2048); - - int repeatTime2 = 1000; -// for (int file_i = 8; file_i < 9; file_i++) { - - for (int file_i = input_path_list.size()-1; file_i >=0 ; file_i--) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - - loader.readHeaders(); - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "RLE+BOS-V", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - writer.close(); - - - } - } - - @Test - public void BOSImproveTimeTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R1O4_decode_time/compression_ratio/rle_bos_v"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 4; file_i < 5; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[1]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "RLE+BOS-V", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); -// break; - } - writer.close(); - } - } - -}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RLEBPTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RLEBPTest.java new file mode 100644 index 0000000..7fdf262 --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RLEBPTest.java
@@ -0,0 +1,1290 @@ +package org.apache.iotdb.tsfile.encoding; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Stream; + +import static java.lang.Math.pow; + +public class RLEBPTest { + + public static int getBitWith(int num) { + if (num == 0) + return 1; + else + return 32 - Integer.numberOfLeadingZeros(num); + } + + public static int getCount(long long1, int mask) { + return ((int) (long1 & mask)); + } + + public static int getUniqueValue(long long1, int left_shift) { + return ((int) ((long1) >> left_shift)); + } + + public static void int2Bytes(int integer, int encode_pos, byte[] cur_byte) { + cur_byte[encode_pos] = (byte) (integer >> 24); + cur_byte[encode_pos + 1] = (byte) (integer >> 16); + cur_byte[encode_pos + 2] = (byte) (integer >> 8); + cur_byte[encode_pos + 3] = (byte) (integer); + } + + public static void intByte2Bytes(int integer, int encode_pos, byte[] cur_byte) { + cur_byte[encode_pos] = (byte) (integer); + } + + private static void long2intBytes(long integer, int encode_pos, byte[] cur_byte) { + cur_byte[encode_pos] = (byte) (integer >> 24); + cur_byte[encode_pos + 1] = (byte) (integer >> 16); + cur_byte[encode_pos + 2] = (byte) (integer >> 8); + cur_byte[encode_pos + 3] = (byte) (integer); + } + + public static int bytes2Integer(byte[] encoded, int start, int num) { + int value = 0; + if (num > 4) { + System.out.println("bytes2Integer error"); + return 0; + } + for (int i = 0; i < num; i++) { + value <<= 8; + int b = encoded[i + start] & 0xFF; + value |= b; + } + return value; + } + + private static long bytesLong2Integer(byte[] encoded, int decode_pos) { + long value = 0; + for (int i = 0; i < 4; i++) { + value <<= 8; + int b = encoded[i + decode_pos] & 0xFF; + value |= b; + } + return value; + } + + public static void pack8Values(ArrayList<Integer> values, int offset, int width, int encode_pos, + byte[] encoded_result) { + int bufIdx = 0; + int valueIdx = offset; + // remaining bits for the current unfinished Integer + int leftBit = 0; + + while (valueIdx < 8 + offset) { + // buffer is used for saving 32 bits as a part of result + int buffer = 0; + // remaining size of bits in the 'buffer' + int leftSize = 32; + + // encode the left bits of current Integer to 'buffer' + if (leftBit > 0) { + buffer |= (values.get(valueIdx) << (32 - leftBit)); + leftSize -= leftBit; + leftBit = 0; + valueIdx++; + } + + while (leftSize >= width && valueIdx < 8 + offset) { + // encode one Integer to the 'buffer' + buffer |= (values.get(valueIdx) << (leftSize - width)); + leftSize -= width; + valueIdx++; + } + // If the remaining space of the buffer can not save the bits for one Integer, + if (leftSize > 0 && valueIdx < 8 + offset) { + // put the first 'leftSize' bits of the Integer into remaining space of the + // buffer + buffer |= (values.get(valueIdx) >>> (width - leftSize)); + leftBit = width - leftSize; + } + + // put the buffer into the final result + for (int j = 0; j < 4; j++) { + encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); + encode_pos++; + bufIdx++; + if (bufIdx >= width) { + return; + } + } + } + + } + + public static void unpack8Values(byte[] encoded, int offset, int width, ArrayList<Integer> result_list) { + int byteIdx = offset; + long buffer = 0; + // total bits which have read from 'buf' to 'buffer'. i.e., + // number of available bits to be decoded. + int totalBits = 0; + int valueIdx = 0; + + while (valueIdx < 8) { + // If current available bits are not enough to decode one Integer, + // then add next byte from buf to 'buffer' until totalBits >= width + while (totalBits < width) { + buffer = (buffer << 8) | (encoded[byteIdx] & 0xFF); + byteIdx++; + totalBits += 8; + } + + // If current available bits are enough to decode one Integer, + // then decode one Integer one by one until left bits in 'buffer' is + // not enough to decode one Integer. + while (totalBits >= width && valueIdx < 8) { + result_list.add((int) (buffer >>> (totalBits - width))); + valueIdx++; + totalBits -= width; + buffer = buffer & ((1L << totalBits) - 1); + } + } + } + + public static int bitPacking(ArrayList<Integer> numbers, int start, int bit_width, int encode_pos, + byte[] encoded_result) { + int block_num = (numbers.size() - start) / 8; + for (int i = 0; i < block_num; i++) { + pack8Values(numbers, start + i * 8, bit_width, encode_pos, encoded_result); + encode_pos += bit_width; + } + + return encode_pos; + + } + + public static ArrayList<Integer> decodeBitPacking( + byte[] encoded, int decode_pos, int bit_width, int block_size) { + ArrayList<Integer> result_list = new ArrayList<>(); + int block_num = (block_size - 1) / 8; + + for (int i = 0; i < block_num; i++) { // bitpacking + unpack8Values(encoded, decode_pos, bit_width, result_list); + decode_pos += bit_width; + } + return result_list; + } + + public static int[] getAbsDeltaTsBlock( + int[] ts_block, + int i, + int block_size, + int remaining, + int[] min_delta, + ArrayList<Integer> repeat_count) { + int[] ts_block_delta = new int[remaining]; + + int value_delta_min = Integer.MAX_VALUE; + int value_delta_max = Integer.MIN_VALUE; + int base = i * block_size; + int end = i * block_size + remaining; + for (int j = base; j < end; j++) { + + int integer = ts_block[j]; + if (integer < value_delta_min) + value_delta_min = integer; + if (integer > value_delta_max) { + value_delta_max = integer; + } + } + int pre_delta = ts_block[i * block_size] - value_delta_min; + int pre_count = 1; + + min_delta[0] = (value_delta_min); + int repeat_i = 0; + int ts_block_delta_i = 0; + for (int j = base + 1; j < end; j++) { + int delta = ts_block[j] - value_delta_min; + if (delta == pre_delta) { + pre_count++; + } else { + if (pre_count > 7) { + repeat_count.add(repeat_i); + repeat_count.add(pre_count); + ts_block_delta[ts_block_delta_i] = pre_delta; + ts_block_delta_i++; + } else { + for (int k = 0; k < pre_count; k++) { + ts_block_delta[ts_block_delta_i] = pre_delta; + ts_block_delta_i++; + } + } + pre_count = 1; + repeat_i = j - i * block_size; + } + pre_delta = delta; + + } + for (int j = 0; j < pre_count; j++) { + ts_block_delta[ts_block_delta_i] = pre_delta; + ts_block_delta_i++; + } + min_delta[1] = (ts_block_delta_i); + min_delta[2] = (value_delta_max - value_delta_min); + int[] new_ts_block_delta = new int[ts_block_delta_i]; + System.arraycopy(ts_block_delta, 0, new_ts_block_delta, 0, ts_block_delta_i); + + return new_ts_block_delta; + } + + public static int encodeOutlier2Bytes( + ArrayList<Integer> ts_block_delta, + int bit_width, + int encode_pos, byte[] encoded_result) { + + encode_pos = bitPacking(ts_block_delta, 0, bit_width, encode_pos, encoded_result); + + int n_k = ts_block_delta.size(); + int n_k_b = n_k / 8; + long cur_remaining = 0; // encoded int + int cur_number_bits = 0; // the bit width used of encoded int + for (int i = n_k_b * 8; i < n_k; i++) { + long cur_value = ts_block_delta.get(i); + int cur_bit_width = bit_width; // remaining bit width of current value + + if (cur_number_bits + bit_width >= 32) { + cur_remaining <<= (32 - cur_number_bits); + cur_bit_width = bit_width - 32 + cur_number_bits; + cur_remaining += ((cur_value >> cur_bit_width)); + long2intBytes(cur_remaining, encode_pos, encoded_result); + encode_pos += 4; + + cur_remaining = 0; + cur_number_bits = 0; + } + + cur_remaining <<= cur_bit_width; + cur_number_bits += cur_bit_width; + cur_remaining += (((cur_value << (32 - cur_bit_width)) & 0xFFFFFFFFL) >> (32 - cur_bit_width)); + } + cur_remaining <<= (32 - cur_number_bits); + long2intBytes(cur_remaining, encode_pos, encoded_result); + encode_pos += 4; + return encode_pos; + + } + + public static ArrayList<Integer> decodeOutlier2Bytes( + byte[] encoded, + int decode_pos, + int bit_width, + int length, + ArrayList<Integer> encoded_pos_result) { + + int n_k_b = length / 8; + int remaining = length - n_k_b * 8; + ArrayList<Integer> result_list = new ArrayList<>( + decodeBitPacking(encoded, decode_pos, bit_width, n_k_b * 8 + 1)); + decode_pos += n_k_b * bit_width; + + ArrayList<Long> int_remaining = new ArrayList<>(); + int int_remaining_size = remaining * bit_width / 32 + 1; + for (int j = 0; j < int_remaining_size; j++) { + + int_remaining.add(bytesLong2Integer(encoded, decode_pos)); + decode_pos += 4; + } + + int cur_remaining_bits = 32; // remaining bit width of current value + long cur_number = int_remaining.get(0); + int cur_number_i = 1; + for (int i = n_k_b * 8; i < length; i++) { + if (bit_width < cur_remaining_bits) { + int tmp = (int) (cur_number >> (32 - bit_width)); + result_list.add(tmp); + cur_number <<= bit_width; + cur_number &= 0xFFFFFFFFL; + cur_remaining_bits -= bit_width; + } else { + int tmp = (int) (cur_number >> (32 - cur_remaining_bits)); + int remain_bits = bit_width - cur_remaining_bits; + tmp <<= remain_bits; + + cur_number = int_remaining.get(cur_number_i); + cur_number_i++; + tmp += (cur_number >> (32 - remain_bits)); + result_list.add(tmp); + cur_number <<= remain_bits; + cur_number &= 0xFFFFFFFFL; + cur_remaining_bits = 32 - remain_bits; + } + } + encoded_pos_result.add(decode_pos); + return result_list; + } + + private static int BOSEncodeBits(int[] ts_block_delta, + int init_block_size, + int final_k_start_value, + int final_x_l_plus, + int final_k_end_value, + int final_x_u_minus, + int max_delta_value, + int[] min_delta, + ArrayList<Integer> repeat_count, + int encode_pos, + byte[] cur_byte) { + int block_size = ts_block_delta.length; + + ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); + ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); + ArrayList<Integer> final_left_outlier = new ArrayList<>(); + ArrayList<Integer> final_right_outlier = new ArrayList<>(); + ArrayList<Integer> final_normal = new ArrayList<>(); + int k1 = 0; + int k2 = 0; + ArrayList<Integer> bitmap_outlier = new ArrayList<>(); + int index_bitmap_outlier = 0; + int cur_index_bitmap_outlier_bits = 0; + for (int i = 0; i < block_size; i++) { + int cur_value = ts_block_delta[i]; + if (cur_value < final_k_start_value) { + final_left_outlier.add(cur_value); + final_left_outlier_index.add(i); + if (cur_index_bitmap_outlier_bits % 8 != 7) { + index_bitmap_outlier <<= 2; + index_bitmap_outlier += 3; + cur_index_bitmap_outlier_bits += 2; + } else { + index_bitmap_outlier <<= 1; + index_bitmap_outlier += 1; + bitmap_outlier.add(index_bitmap_outlier); + index_bitmap_outlier = 1; + cur_index_bitmap_outlier_bits = 1; + } + + k1++; + + } else if (cur_value >= final_k_end_value) { + final_right_outlier.add(cur_value - final_k_end_value); + final_right_outlier_index.add(i); + if (cur_index_bitmap_outlier_bits % 8 != 7) { + index_bitmap_outlier <<= 2; + index_bitmap_outlier += 2; + cur_index_bitmap_outlier_bits += 2; + } else { + index_bitmap_outlier <<= 1; + index_bitmap_outlier += 1; + bitmap_outlier.add(index_bitmap_outlier); + index_bitmap_outlier = 0; + cur_index_bitmap_outlier_bits = 1; + } + k2++; + + } else { + final_normal.add(cur_value - final_x_l_plus); + index_bitmap_outlier <<= 1; + cur_index_bitmap_outlier_bits += 1; + } + if (cur_index_bitmap_outlier_bits % 8 == 0) { + bitmap_outlier.add(index_bitmap_outlier); + index_bitmap_outlier = 0; + } + } + if (cur_index_bitmap_outlier_bits % 8 != 0) { + + index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); + + index_bitmap_outlier &= 0xFF; + bitmap_outlier.add(index_bitmap_outlier); + } + int final_alpha = ((k1 + k2) * getBitWith(block_size - 1)) <= (block_size + k1 + k2) ? 1 : 0; + + int k_byte = (k1 << 1); + k_byte += final_alpha; + k_byte += (k2 << 16); + + int2Bytes(k_byte, encode_pos, cur_byte); + encode_pos += 4; + + int2Bytes(min_delta[0], encode_pos, cur_byte); + encode_pos += 4; + int size = repeat_count.size(); + intByte2Bytes(size, encode_pos, cur_byte); + encode_pos += 1; + + if (size != 0) + encode_pos = encodeOutlier2Bytes(repeat_count, getBitWith(init_block_size - 1), encode_pos, cur_byte); + + int2Bytes(final_x_l_plus, encode_pos, cur_byte); + encode_pos += 4; + int2Bytes(final_k_end_value, encode_pos, cur_byte); + encode_pos += 4; + + int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); + intByte2Bytes(bit_width_final, encode_pos, cur_byte); + encode_pos += 1; + int left_bit_width = getBitWith(final_k_start_value); + int right_bit_width = getBitWith(max_delta_value - final_k_end_value); + intByte2Bytes(left_bit_width, encode_pos, cur_byte); + encode_pos += 1; + intByte2Bytes(right_bit_width, encode_pos, cur_byte); + encode_pos += 1; + if (final_alpha == 0) { + + for (int i : bitmap_outlier) { + + intByte2Bytes(i, encode_pos, cur_byte); + encode_pos += 1; + } + } else { + encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size - 1), encode_pos, + cur_byte); + encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size - 1), encode_pos, + cur_byte); + } + encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final, encode_pos, cur_byte); + if (k1 != 0) + encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width, encode_pos, cur_byte); + if (k2 != 0) + encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width, encode_pos, cur_byte); + return encode_pos; + + } + + private static int BOSBlockEncoder(int[] ts_block, int block_i, int block_size, int remaining, int encode_pos, + byte[] cur_byte) { + + ArrayList<Integer> repeat_count = new ArrayList<>(); + int init_block_size = block_size; + + int[] min_delta = new int[3]; + int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, init_block_size, remaining, min_delta, + repeat_count); + block_size = min_delta[1]; + + int max_delta_value = min_delta[2]; + + return encode_pos; + } + + public static int BOSEncoder( + int[] data, int block_size, byte[] encoded_result) { + + int length_all = data.length; + + int encode_pos = 0; + int2Bytes(length_all, encode_pos, encoded_result); + encode_pos += 4; + + int block_num = length_all / block_size; + int2Bytes(block_size, encode_pos, encoded_result); + encode_pos += 4; + + for (int i = 0; i < block_num; i++) { + encode_pos = BOSBlockEncoder(data, i, block_size, block_size, encode_pos, encoded_result); + // System.out.println(encode_pos); + } + + int remaining_length = length_all - block_num * block_size; + if (remaining_length <= 3) { + for (int i = remaining_length; i > 0; i--) { + int2Bytes(data[data.length - i], encode_pos, encoded_result); + encode_pos += 4; + } + + } else { + + int start = block_num * block_size; + int remaining = length_all - start; + + encode_pos = BOSBlockEncoder(data, block_num, block_size, remaining, encode_pos, encoded_result); + + // int[] ts_block = new int[length_all-start]; + // if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, + // length_all - start); + // + // + // encode_pos = BOSBlockEncoder(ts_block, encode_pos,encoded_result); + + } + + return encode_pos; + } + + public static int EncodeBits(int num, + int bit_width, + int encode_pos, + byte[] cur_byte, + int[] bit_index_list) { + // 找到要插入的位的索引 + int bit_index = bit_index_list[0];// cur_byte[encode_pos + 1]; + + // 计算数值的起始位位置 + int remaining_bits = bit_width; + + while (remaining_bits > 0) { + // 计算在当前字节中可以使用的位数 + int available_bits = bit_index; + int bits_to_write = Math.min(available_bits, remaining_bits); + + // 更新 bit_index + bit_index = available_bits - bits_to_write; + + // 计算要写入的位的掩码和数值 + int mask = (1 << bits_to_write) - 1; + int bits = (num >> (remaining_bits - bits_to_write)) & mask; + + // 写入到当前位置 + cur_byte[encode_pos] &= (byte) ~(mask << bit_index); // 清除对应位置的位 + cur_byte[encode_pos] |= (byte) (bits << bit_index); + + // 更新位宽和数值 + remaining_bits -= bits_to_write; + if (bit_index == 0) { + bit_index = 8; + encode_pos++; + } + } + bit_index_list[0] = bit_index; + // cur_byte[encode_pos + 1] = (byte) bit_index; + return encode_pos; + } + + private static int BOSBlockEncoderImprove(int[] ts_block, int block_i, int block_size, int remaining, + int encode_pos, byte[] cur_byte) { + + ArrayList<Integer> repeat_count = new ArrayList<>(); + int init_block_size = block_size; + + int[] min_delta = new int[3]; + int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, init_block_size, remaining, min_delta, + repeat_count); + + int max_delta_value = min_delta[2]; + + int2Bytes(min_delta[0], encode_pos, cur_byte); + encode_pos += 4; + + int size = repeat_count.size(); + intByte2Bytes(size, encode_pos, cur_byte); + encode_pos += 1; + + int[] bit_index_list = new int[1]; + bit_index_list[0] = 8; + if (size != 0) { + int bit_width_init = getBitWith(init_block_size - 1); + for (int repeat_count_v : repeat_count) { + encode_pos = EncodeBits(repeat_count_v, bit_width_init, encode_pos, cur_byte, bit_index_list); + } + if (bit_index_list[0] != 8) { + bit_index_list[0] = 8; + encode_pos++; + } + } + + int bit_width_final = getBitWith(max_delta_value); + intByte2Bytes(bit_width_final, encode_pos, cur_byte); + encode_pos += 1; + + bit_index_list[0] = 8; + for (int cur_value : ts_block_delta) { + encode_pos = EncodeBits(cur_value, bit_width_final, encode_pos, cur_byte, bit_index_list); + // final_normal.add(cur_value); + } + if (bit_index_list[0] != 8) { + encode_pos++; + } + + return encode_pos; + } + + public static int BOSEncoderImprove( + int[] data, int block_size, byte[] encoded_result) { + + int length_all = data.length; + + int encode_pos = 0; + int2Bytes(length_all, encode_pos, encoded_result); + encode_pos += 4; + + int block_num = length_all / block_size; + int2Bytes(block_size, encode_pos, encoded_result); + encode_pos += 4; + + for (int i = 0; i < block_num; i++) { + + encode_pos = BOSBlockEncoderImprove(data, i, block_size, block_size, encode_pos, encoded_result); + // System.out.println(encode_pos); + } + + int remaining_length = length_all - block_num * block_size; + if (remaining_length <= 3) { + for (int i = remaining_length; i > 0; i--) { + int2Bytes(data[data.length - i], encode_pos, encoded_result); + encode_pos += 4; + } + + } else { + + int start = block_num * block_size; + int remaining = length_all - start; + + encode_pos = BOSBlockEncoderImprove(data, block_num, block_size, remaining, encode_pos, encoded_result); + + // int[] ts_block = new int[length_all-start]; + // if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, + // length_all - start); + // + // + // encode_pos = BOSBlockEncoder(ts_block, encode_pos,encoded_result); + + } + + return encode_pos; + } + + public static int BOSBlockDecoder(byte[] encoded, int decode_pos, int[] value_list, int init_block_size, + int block_size, int[] value_pos_arr) { + + int k_byte = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + int k1_byte = (int) (k_byte % pow(2, 16)); + int k1 = k1_byte / 2; + int final_alpha = k1_byte % 2; + + int k2 = (int) (k_byte / pow(2, 16)); + + int min_delta = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + + int count_size = bytes2Integer(encoded, decode_pos, 1); + decode_pos += 1; + + ArrayList<Integer> repeat_count = new ArrayList<>(); + if (count_size != 0) { + ArrayList<Integer> repeat_count_result = new ArrayList<>(); + repeat_count = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(init_block_size - 1), count_size, + repeat_count_result); + decode_pos = repeat_count_result.get(0); + + } + + int cur_block_size = block_size; + for (int i = 1; i < count_size; i += 2) { + cur_block_size -= (repeat_count.get(i) - 1); + } + + int final_k_start_value = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + + int final_k_end_value = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + + int bit_width_final = bytes2Integer(encoded, decode_pos, 1); + decode_pos += 1; + + int left_bit_width = bytes2Integer(encoded, decode_pos, 1); + decode_pos += 1; + int right_bit_width = bytes2Integer(encoded, decode_pos, 1); + decode_pos += 1; + + ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); + ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); + ArrayList<Integer> final_left_outlier = new ArrayList<>(); + ArrayList<Integer> final_right_outlier = new ArrayList<>(); + ArrayList<Integer> final_normal; + ArrayList<Integer> bitmap_outlier = new ArrayList<>(); + + if (final_alpha == 0) { // 0 + int bitmap_bytes = (int) Math.ceil((double) (cur_block_size + k1 + k2) / (double) 8); + // System.out.println("bitmap_bytes:" + bitmap_bytes); + for (int i = 0; i < bitmap_bytes; i++) { + bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); + decode_pos += 1; + } + int bitmap_outlier_i = 0; + int remaining_bits = 8; + int tmp = bitmap_outlier.get(bitmap_outlier_i); + bitmap_outlier_i++; + int i = 0; + while (i < cur_block_size) { + if (remaining_bits > 1) { + int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; + remaining_bits -= 1; + if (bit_i == 1) { + int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; + remaining_bits -= 1; + if (bit_left_right == 1) { + final_left_outlier_index.add(i); + } else { + final_right_outlier_index.add(i); + } + } + if (remaining_bits == 0) { + remaining_bits = 8; + if (bitmap_outlier_i >= bitmap_bytes) + break; + tmp = bitmap_outlier.get(bitmap_outlier_i); + bitmap_outlier_i++; + } + } else if (remaining_bits == 1) { + int bit_i = tmp & 0x1; + remaining_bits = 8; + if (bitmap_outlier_i >= bitmap_bytes) + break; + tmp = bitmap_outlier.get(bitmap_outlier_i); + bitmap_outlier_i++; + if (bit_i == 1) { + int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; + remaining_bits -= 1; + if (bit_left_right == 1) { + final_left_outlier_index.add(i); + } else { + final_right_outlier_index.add(i); + } + } + } + i++; + } + } else { + + ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); + final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(cur_block_size - 1), k1, + decode_pos_result_left); + decode_pos = (decode_pos_result_left.get(0)); + + ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); + final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(cur_block_size - 1), k2, + decode_pos_result_right); + decode_pos = (decode_pos_result_right.get(0)); + + } + + ArrayList<Integer> decode_pos_normal = new ArrayList<>(); + final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, cur_block_size - k1 - k2, + decode_pos_normal); + + decode_pos = decode_pos_normal.get(0); + if (k1 != 0) { + ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); + final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); + + decode_pos = decode_pos_result_left.get(0); + } + if (k2 != 0) { + ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); + final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, + decode_pos_result_right); + decode_pos = decode_pos_result_right.get(0); + } + int left_outlier_i = 0; + int right_outlier_i = 0; + int normal_i = 0; + int pre_v; + // int final_k_end_value = (int) (final_k_start_value + pow(2, + // bit_width_final)); + + int cur_i = 0; + int repeat_i = 0; + for (int i = 0; i < cur_block_size; i++) { + + int current_delta; + if (left_outlier_i >= k1) { + if (right_outlier_i >= k2) { + current_delta = final_normal.get(normal_i) + final_k_start_value + 1; + normal_i++; + } else if (i == final_right_outlier_index.get(right_outlier_i)) { + current_delta = final_right_outlier.get(right_outlier_i) + final_k_end_value; + right_outlier_i++; + } else { + current_delta = final_normal.get(normal_i) + final_k_start_value + 1; + normal_i++; + } + } else if (i == final_left_outlier_index.get(left_outlier_i)) { + current_delta = final_left_outlier.get(left_outlier_i); + left_outlier_i++; + } else { + + if (right_outlier_i >= k2) { + current_delta = final_normal.get(normal_i) + final_k_start_value + 1; + normal_i++; + } else if (i == final_right_outlier_index.get(right_outlier_i)) { + current_delta = final_right_outlier.get(right_outlier_i) + final_k_end_value; + right_outlier_i++; + } else { + current_delta = final_normal.get(normal_i) + final_k_start_value + 1; + normal_i++; + } + } + pre_v = current_delta + min_delta; + if (repeat_i < count_size) { + if (cur_i == repeat_count.get(repeat_i)) { + cur_i += (repeat_count.get(repeat_i + 1)); + + for (int j = 0; j < repeat_count.get(repeat_i + 1); j++) { + value_list[value_pos_arr[0]] = pre_v; + value_pos_arr[0]++; + } + repeat_i += 2; + } else { + cur_i++; + value_list[value_pos_arr[0]] = pre_v; + value_pos_arr[0]++; + } + } else { + cur_i++; + value_list[value_pos_arr[0]] = pre_v; + value_pos_arr[0]++; + } + } + return decode_pos; + } + + public static void BOSDecoder(byte[] encoded) { + + int decode_pos = 0; + int length_all = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + int block_size = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + + int block_num = length_all / block_size; + int remain_length = length_all - block_num * block_size; + + int[] value_list = new int[length_all + block_size]; + int[] value_pos_arr = new int[1]; + + for (int k = 0; k < block_num; k++) { + decode_pos = BOSBlockDecoder(encoded, decode_pos, value_list, block_size, block_size, value_pos_arr); + } + + if (remain_length <= 3) { + for (int i = 0; i < remain_length; i++) { + int value_end = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + value_list[value_pos_arr[0]] = value_end; + value_pos_arr[0]++; + } + } else { + BOSBlockDecoder(encoded, decode_pos, value_list, block_size, remain_length, value_pos_arr); + } + } + + public static int DecodeBits(byte[] cur_byte, int bit_width, int[] decode_pos_list) { + int decode_pos = decode_pos_list[0]; + int bit_index = decode_pos_list[1]; // cur_byte[decode_pos + 1]; + int remaining_bits = bit_width; + int num = 0; + + while (remaining_bits > 0) { + int available_bits = bit_index; + int bits_to_read = Math.min(available_bits, remaining_bits); + + // 计算要读取的位的掩码 + int mask = (1 << bits_to_read) - 1; + int bits = (cur_byte[decode_pos] >> (available_bits - bits_to_read)) & mask; + + // 将读取的位合并到结果中 + num = (num << bits_to_read) | bits; + + // 更新位宽和 bit_index + remaining_bits -= bits_to_read; + bit_index = available_bits - bits_to_read; + + if (bit_index == 0) { + bit_index = 8; + decode_pos++; + } + } + decode_pos_list[0] = decode_pos; + decode_pos_list[1] = bit_index; + + return num; + } + + public static int BOSBlockDecoderImprove(byte[] encoded, int decode_pos, int[] value_list, int init_block_size, + int block_size, int[] value_pos_arr) { + + int min_delta = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + + int count_size = bytes2Integer(encoded, decode_pos, 1); + decode_pos += 1; + + int[] decode_list = new int[2]; + decode_list[0] = decode_pos; + decode_list[1] = 8; + + ArrayList<Integer> repeat_count = new ArrayList<>(); + if (count_size != 0) { + int bit_width_init = getBitWith(init_block_size - 1); + for (int i = 0; i < count_size; i++) { + int repeat_count_v = DecodeBits(encoded, bit_width_init, decode_list); + repeat_count.add(repeat_count_v); + } + + if (decode_list[1] != 8) { + decode_list[1] = 8; + decode_list[0]++; + } + // repeat_count = decodeOutlier2Bytes(encoded, decode_pos, + // getBitWith(init_block_size-1), count_size, repeat_count_result); + decode_pos = decode_list[0]; + // decode_list[1]= 8; + } + + int cur_block_size = block_size; + for (int i = 1; i < count_size; i += 2) { + cur_block_size -= (repeat_count.get(i) - 1); + } + + int bit_width_final = bytes2Integer(encoded, decode_pos, 1); + decode_pos += 1; + + int pre_v; + int cur_i = 0; + int repeat_i = 0; + + decode_list[0] = decode_pos; + decode_list[1] = 8; + for (int i = 0; i < cur_block_size; i++) { + pre_v = min_delta + DecodeBits(encoded, bit_width_final, decode_list); + // value_list[value_pos_arr[0]++] = pre_v; + if (repeat_i < count_size && cur_i == repeat_count.get(repeat_i)) { + cur_i += (repeat_count.get(repeat_i + 1)); + + for (int j = 0; j < repeat_count.get(repeat_i + 1); j++) { + value_list[value_pos_arr[0]++] = pre_v; + } + repeat_i += 2; + } else { + cur_i++; + value_list[value_pos_arr[0]++] = pre_v; + } + } + if (decode_list[1] != 8) { + decode_list[1] = 8; + decode_list[0]++; + } + + return decode_list[0]; + + } + + public static void BOSDecoderImprove(byte[] encoded) { + + int decode_pos = 0; + int length_all = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + int block_size = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + + int block_num = length_all / block_size; + int remain_length = length_all - block_num * block_size; + + int[] value_list = new int[length_all + block_size]; + int[] value_pos_arr = new int[1]; + + for (int k = 0; k < block_num; k++) { + // System.out.println(k); + decode_pos = BOSBlockDecoderImprove(encoded, decode_pos, value_list, block_size, block_size, value_pos_arr); + // System.out.println(decode_pos); + } + if (remain_length <= 3) { + for (int i = 0; i < remain_length; i++) { + int value_end = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + value_list[value_pos_arr[0]] = value_end; + value_pos_arr[0]++; + } + } else { + BOSBlockDecoderImprove(encoded, decode_pos, value_list, block_size, remain_length, value_pos_arr); + } + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testSubcolumn() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/"; + // String output_parent_dir = parent_dir + "result/"; + + String outputPath = output_parent_dir + "rle.csv"; + + int block_size = 1024; + + int repeatTime = 100; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); // write header to output file + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + // System.out.println(f); + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + // ArrayList<Integer> data2 = new ArrayList<>(); + + // loader.readHeaders(); + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + // String value = loader.getValues()[index]; + data1.add(Float.valueOf(f_str)); + // data2.add(Integer.valueOf(loader.getValues()[1])); + // data.add(Integer.valueOf(value)); + } + + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = BOSEncoderImprove(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + ratio += ratioTmp; + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + BOSDecoderImprove(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "RLE", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + System.out.println(ratio); + } + writer.close(); + + } + + @Test + public void testTransData() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String output_parent_dir = "D:/encoding-subcolumn/trans_data_result/"; + // String output_parent_dir = parent_dir + "trans_data_result/"; + + String input_parent_dir = parent_dir + "trans_data/"; + + ArrayList<String> input_path_list = new ArrayList<>(); + ArrayList<String> output_path_list = new ArrayList<>(); + ArrayList<String> dataset_name = new ArrayList<>(); + ArrayList<Integer> dataset_block_size = new ArrayList<>(); + + try (Stream<Path> paths = Files.walk(Paths.get(input_parent_dir))) { + paths.filter(Files::isDirectory) + .filter(path -> !path.equals(Paths.get(input_parent_dir))) + .forEach(dir -> { + String name = dir.getFileName().toString(); + dataset_name.add(name); + input_path_list.add(dir.toString()); + dataset_block_size.add(1024); + }); + } + + String outputPath = output_parent_dir + "rle.csv"; + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + int repeatTime = 100; + + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { + + String inputPath = input_path_list.get(file_i); + System.out.println(inputPath); + + File file = new File(inputPath); + File[] tempList = file.listFiles(); + + long totalEncodeTime = 0; + long totalDecodeTime = 0; + double totalCompressedSize = 0; + int totalPoints = 0; + + for (File f : tempList) { + String datasetName = extractFileName(f.toString()); + InputStream inputStream = Files.newInputStream(f.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Integer> data1 = new ArrayList<>(); + ArrayList<Integer> data2 = new ArrayList<>(); + + loader.readHeaders(); + while (loader.readRecord()) { + // String value = loader.getValues()[index]; + data1.add(Integer.valueOf(loader.getValues()[0])); + data2.add(Integer.valueOf(loader.getValues()[1])); + // data.add(Integer.valueOf(value)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + for (int i = 0; i < data2.size(); i++) { + data2_arr[i] = data2.get(i); + } + byte[] encoded_result = new byte[data2_arr.length * 4]; + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + ratio += ratioTmp; + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + BOSDecoderImprove(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + totalEncodeTime += encodeTime; + totalDecodeTime += decodeTime; + totalCompressedSize += compressed_size; + totalPoints += data1.size(); + + } + + double compressionRatio = totalCompressedSize / (totalPoints * Integer.BYTES); + + String[] record = { + dataset_name.get(file_i), + "RLE", + String.valueOf(totalEncodeTime), + String.valueOf(totalDecodeTime), + String.valueOf(totalPoints), + String.valueOf(totalCompressedSize), + String.valueOf(compressionRatio) + }; + + writer.writeRecord(record); + System.out.println(compressionRatio); + } + writer.close(); + } + +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZBOSBTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZBOSBTest.java deleted file mode 100644 index 2090235..0000000 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZBOSBTest.java +++ /dev/null
@@ -1,2150 +0,0 @@ -package org.apache.iotdb.tsfile.encoding; - -import com.csvreader.CsvReader; -import com.csvreader.CsvWriter; -import org.junit.Test; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; - -import static java.lang.Math.pow; - -public class SPRINTZBOSBTest { - - public static int getBitWith(int num) { - if (num == 0) return 1; - else return 32 - Integer.numberOfLeadingZeros(num); - } - public static int getCount(long long1, int mask) { - return ((int) (long1 & mask)); - } - public static int getUniqueValue(long long1, int left_shift) { - return ((int) ((long1) >> left_shift)); - } - - public static int zigzag(int num) { - if (num < 0) return ((-num)<<1)-1; - else return num<<1; - } - - public static int deZigzag(int num) { - if (num % 2 == 0) return num>>1; - else return -((num+1)>>1); - } - - public static void int2Bytes(int integer,int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - - public static void intByte2Bytes(int integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer); - } - - private static void long2intBytes(long integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - public static int bytes2Integer(byte[] encoded, int start, int num) { - int value = 0; - if (num > 4) { - System.out.println("bytes2Integer error"); - return 0; - } - for (int i = 0; i < num; i++) { - value <<= 8; - int b = encoded[i + start] & 0xFF; - value |= b; - } - return value; - } - - private static long bytesLong2Integer(byte[] encoded, int decode_pos) { - long value = 0; - for (int i = 0; i < 4; i++) { - value <<= 8; - int b = encoded[i + decode_pos] & 0xFF; - value |= b; - } - return value; - } - - public static void pack8Values(ArrayList<Integer> values, int offset, int width, int encode_pos, byte[] encoded_result) { - int bufIdx = 0; - int valueIdx = offset; - // remaining bits for the current unfinished Integer - int leftBit = 0; - - while (valueIdx < 8 + offset) { - // buffer is used for saving 32 bits as a part of result - int buffer = 0; - // remaining size of bits in the 'buffer' - int leftSize = 32; - - // encode the left bits of current Integer to 'buffer' - if (leftBit > 0) { - buffer |= (values.get(valueIdx) << (32 - leftBit)); - leftSize -= leftBit; - leftBit = 0; - valueIdx++; - } - - while (leftSize >= width && valueIdx < 8 + offset) { - // encode one Integer to the 'buffer' - buffer |= (values.get(valueIdx)<< (leftSize - width)); - leftSize -= width; - valueIdx++; - } - // If the remaining space of the buffer can not save the bits for one Integer, - if (leftSize > 0 && valueIdx < 8 + offset) { - // put the first 'leftSize' bits of the Integer into remaining space of the - // buffer - buffer |= (values.get(valueIdx) >>> (width - leftSize)); - leftBit = width - leftSize; - } - - // put the buffer into the final result - for (int j = 0; j < 4; j++) { - encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); - encode_pos ++; - bufIdx++; - if (bufIdx >= width) { - return ; - } - } - } - - } - - public static void unpack8Values(byte[] encoded, int offset,int width, ArrayList<Integer> result_list) { - int byteIdx = offset; - long buffer = 0; - // total bits which have read from 'buf' to 'buffer'. i.e., - // number of available bits to be decoded. - int totalBits = 0; - int valueIdx = 0; - - while (valueIdx < 8) { - // If current available bits are not enough to decode one Integer, - // then add next byte from buf to 'buffer' until totalBits >= width - while (totalBits < width) { - buffer = (buffer << 8) | (encoded[byteIdx] & 0xFF); - byteIdx++; - totalBits += 8; - } - - // If current available bits are enough to decode one Integer, - // then decode one Integer one by one until left bits in 'buffer' is - // not enough to decode one Integer. - while (totalBits >= width && valueIdx < 8) { - result_list.add ((int) (buffer >>> (totalBits - width))); - valueIdx++; - totalBits -= width; - buffer = buffer & ((1L << totalBits) - 1); - } - } - } - - public static int bitPacking(ArrayList<Integer> numbers, int start, int bit_width,int encode_pos, byte[] encoded_result) { - int block_num = (numbers.size()-start) / 8; - for(int i=0;i<block_num;i++){ - pack8Values( numbers, start+i*8, bit_width,encode_pos, encoded_result); - encode_pos +=bit_width; - } - - return encode_pos; - - } - - public static ArrayList<Integer> decodeBitPacking( - byte[] encoded, int decode_pos, int bit_width, int block_size) { - ArrayList<Integer> result_list = new ArrayList<>(); - int block_num = (block_size - 1) / 8; - - for (int i = 0; i < block_num; i++) { // bitpacking - unpack8Values( encoded, decode_pos, bit_width, result_list); - decode_pos += bit_width; - } - return result_list; - } - - - // ----------------------------------------------------------------- - - - public static int[] getAbsDeltaTsBlock( - int[] ts_block, - int i, - int block_size, - int remaining, - int[] min_delta) { - int[] ts_block_delta = new int[remaining-1]; - - int base = i*block_size+1; - int end = i*block_size+remaining; - min_delta[0]=ts_block[base-1]; - int value_delta_min = Integer.MAX_VALUE; - int value_delta_max = Integer.MIN_VALUE; - for (int j = base; j < end; j++) { - int epsilon_v = ts_block[j] - ts_block[j - 1]; - epsilon_v = zigzag(epsilon_v); - if (epsilon_v < value_delta_min) { - value_delta_min = epsilon_v; - } - if (epsilon_v > value_delta_max) { - value_delta_max = epsilon_v; - } - ts_block_delta[j-base] =epsilon_v; - - } - for (int j = 0; j < remaining-1; j++) { - ts_block_delta[j] =ts_block_delta[j]-value_delta_min; - - } - min_delta[1] = (value_delta_min); - min_delta[2] = (value_delta_max-value_delta_min); - - - return ts_block_delta; - } - - public static int encodeOutlier2Bytes( - ArrayList<Integer> ts_block_delta, - int bit_width, - int encode_pos, byte[] encoded_result) { - - encode_pos = bitPacking(ts_block_delta, 0, bit_width, encode_pos, encoded_result); - - int n_k = ts_block_delta.size(); - int n_k_b = n_k / 8; - long cur_remaining = 0; // encoded int - int cur_number_bits = 0; // the bit width used of encoded int - for (int i = n_k_b * 8; i < n_k; i++) { - long cur_value = ts_block_delta.get(i); - int cur_bit_width = bit_width; // remaining bit width of current value - - if (cur_number_bits + bit_width >= 32) { - cur_remaining <<= (32 - cur_number_bits); - cur_bit_width = bit_width - 32 + cur_number_bits; - cur_remaining += ((cur_value >> cur_bit_width)); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - cur_remaining = 0; - cur_number_bits = 0; - } - - cur_remaining <<= cur_bit_width; - cur_number_bits += cur_bit_width; - cur_remaining += (((cur_value << (32 - cur_bit_width)) & 0xFFFFFFFFL) >> (32 - cur_bit_width)); - } - cur_remaining <<= (32 - cur_number_bits); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - return encode_pos; - - - } - - - public static ArrayList<Integer> decodeOutlier2Bytes( - byte[] encoded, - int decode_pos, - int bit_width, - int length, - ArrayList<Integer> encoded_pos_result - ) { - - int n_k_b = length / 8; - int remaining = length - n_k_b * 8; - ArrayList<Integer> result_list = new ArrayList<>(decodeBitPacking(encoded, decode_pos, bit_width, n_k_b * 8 + 1)); - decode_pos += n_k_b * bit_width; - - ArrayList<Long> int_remaining = new ArrayList<>(); - int int_remaining_size = remaining * bit_width / 32 + 1; - for (int j = 0; j < int_remaining_size; j++) { - int_remaining.add(bytesLong2Integer(encoded, decode_pos)); - decode_pos += 4; - } - - int cur_remaining_bits = 32; // remaining bit width of current value - long cur_number = int_remaining.get(0); - int cur_number_i = 1; - for (int i = n_k_b * 8; i < length; i++) { - if (bit_width < cur_remaining_bits) { - int tmp = (int) (cur_number >> (32 - bit_width)); - result_list.add(tmp); - cur_number <<= bit_width; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits -= bit_width; - } else { - int tmp = (int) (cur_number >> (32 - cur_remaining_bits)); - int remain_bits = bit_width - cur_remaining_bits; - tmp <<= remain_bits; - - cur_number = int_remaining.get(cur_number_i); - cur_number_i++; - tmp += (cur_number >> (32 - remain_bits)); - result_list.add(tmp); - cur_number <<= remain_bits; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits = 32 - remain_bits; - } - } - encoded_pos_result.add(decode_pos); - return result_list; - } - - private static int BOSEncodeBits(int[] ts_block_delta, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal = new ArrayList<>(); - int k1 = 0; - int k2 = 0; - - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { - final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - k1++; - - - } else if (cur_value >= final_k_end_value) { - final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { - final_normal.add(cur_value - final_k_start_value-1); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - int2Bytes(min_delta[0],encode_pos,cur_byte); // x0 - encode_pos += 4; - int2Bytes(min_delta[1],encode_pos,cur_byte); // x_min - encode_pos += 4; - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; -// int2Bytes(final_k_start_value,encode_pos,cur_byte); -// encode_pos += 4; - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } - encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - if (k1 != 0) - encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); - if (k2 != 0) - encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); - return encode_pos; - - } - - - private static int BOSBlockEncoder(int[] ts_block, int block_i, int block_size, int remaining ,int encode_pos , byte[] cur_byte) { - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); - - - block_size = remaining-1; - int max_delta_value = min_delta[2]; - - int[] value_list = new int[block_size]; - int unique_value_count = 0; - int[] value_count_list = new int[max_delta_value+1]; - for(int value:ts_block_delta){ - if(value_count_list[value]==0){ - value_count_list[value] = 1; - value_list[unique_value_count] = value; - unique_value_count ++; - }else{ - value_count_list[value] ++; - } - } - - int left_shift = getBitWith(block_size); - int mask = (1 << left_shift) - 1; - long[] sorted_value_list = new long[unique_value_count]; - int count = 0; - - for(int i=0;i<unique_value_count;i++){ - int value = value_list[i]; - sorted_value_list[i] = (((long) value) << left_shift) + value_count_list[value]; - } - Arrays.sort(sorted_value_list); - - for(int i=0;i<unique_value_count;i++){ - count += getCount(sorted_value_list[i], mask); - sorted_value_list[i] = (((long)getUniqueValue(sorted_value_list[i], left_shift) ) << left_shift) + count;//new_value_list[i] - } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2 ) * (block_size)); - - int cur_k1 = 0; - - int x_l_plus_value = 0; // x_l_plus - int x_u_minus_value = max_delta_value; // x_u_plus - - for (int end_value_i = 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); // cur_k1 = 0 - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_x_u_minus = x_u_minus_value; - final_k_end_value = x_u_plus_value; - } - } - - int k_start_value = -1; // x_l_minus - - int gamma_max = getBitWith(max_delta_value); - int[] gamma_count_list = new int[gamma_max+1]; - int[] x_u_minus_value_list = new int[gamma_max+1]; - int[] x_u_plus_value_list = new int[gamma_max+1]; - int end_i = unique_value_count - 1; - for(int gamma = 0; gamma <= gamma_max; gamma++) { - int x_u_plus_pow_beta = (int) (max_delta_value - pow(2, gamma) + 1); - for (; end_i > 0; end_i--) { - x_u_minus_value = getUniqueValue(sorted_value_list[end_i - 1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_i], left_shift); - if (x_u_minus_value < x_u_plus_pow_beta && x_u_plus_value >= x_u_plus_pow_beta){ - gamma_count_list[gamma] = getCount(sorted_value_list[end_i-1],mask); - x_u_minus_value_list[gamma] = x_u_minus_value; - x_u_plus_value_list[gamma] = x_u_plus_value; - } else if (x_u_minus_value < x_u_plus_pow_beta) { - break; - } - } - } - for(int gamma = 1; gamma < gamma_max; gamma++) { - if(gamma_count_list[gamma]==0){ - gamma_count_list[gamma] = gamma_count_list[gamma-1]; - x_u_minus_value_list[gamma] = x_u_minus_value_list[gamma-1]; - x_u_plus_value_list[gamma] = x_u_plus_value_list[gamma-1]; - } - } - - for (int start_value_i = 0; start_value_i < unique_value_count-1; start_value_i++) { - long k_start_valueL = sorted_value_list[start_value_i]; - k_start_value = getUniqueValue(k_start_valueL, left_shift) ; - x_l_plus_value = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - - - cur_k1 = getCount(k_start_valueL,mask); - - int k_end_value; - int cur_bits; - int cur_k2; - k_end_value = max_delta_value + 1; - - cur_bits = 0; - cur_k2 = 0; - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1) * getBitWith(k_end_value- x_l_plus_value); //cur_k2 =0 - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = max_delta_value; - } - - int beta_max = getBitWith(max_delta_value - x_l_plus_value); - int end_value_i = start_value_i + 1; - int lower_outlier_cost = cur_k1 * getBitWith(k_start_value); - for(int beta = 1; beta < beta_max; beta++){ - int x_u_plus_pow_beta = (int) (x_l_plus_value + pow(2,beta)); - - for (; end_value_i < unique_value_count; end_value_i++) { - long k_end_valueL = sorted_value_list[end_value_i-1]; - - x_u_minus_value = getUniqueValue(k_end_valueL, left_shift); - k_end_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - if(x_u_minus_value < x_u_plus_pow_beta && k_end_value >= x_u_plus_pow_beta){ - cur_bits = 0; - cur_k2 = block_size - getCount(k_end_valueL,mask); - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += lower_outlier_cost; - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = x_u_minus_value; - } - } else if (x_u_minus_value >= x_u_plus_pow_beta && k_end_value >= x_u_plus_pow_beta) { - break; - } - } - } - - for(int gamma = 0; gamma < beta_max; gamma++){ -// int x_u_plus_pow_beta = (int) (max_delta_value - pow(2,gamma)+1); - x_u_minus_value = x_u_minus_value_list[gamma]; - k_end_value = x_u_plus_value_list[gamma]; - cur_bits = 0; - cur_k2 = block_size - gamma_count_list[gamma]; - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += lower_outlier_cost; - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = x_u_minus_value; - } - - } - -// end_value_i = unique_value_count - 1; -// for(int gamma = 1; gamma <= beta_max; gamma++){ -// for (; end_value_i > start_value_i; end_value_i--) { -// int x_u_plus_pow_beta = (int) (max_delta_value - pow(2,gamma)+1); -// x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); -// k_end_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); -// if(x_u_minus_value < x_u_plus_pow_beta && k_end_value >= x_u_plus_pow_beta){ -// cur_bits = 0; -// cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); -// -// cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); -// cur_bits += cur_k1 * getBitWith(k_start_value); -// if (cur_k1 + cur_k2 != block_size) -// cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); -// if (cur_k2 != 0) -// cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); -// -// -// if (cur_bits < min_bits) { -// min_bits = cur_bits; -// final_k_start_value = k_start_value; -// final_x_l_plus = x_l_plus_value; -// final_k_end_value = k_end_value; -// final_x_u_minus = x_u_minus_value; -// } -// } else if (x_u_minus_value <= x_u_plus_pow_beta && k_end_value <= x_u_plus_pow_beta) { -// break; -// } -// } -// } - - - } - - encode_pos = BOSEncodeBits(ts_block_delta, final_k_start_value, final_x_l_plus, final_k_end_value, final_x_u_minus, - max_delta_value, min_delta, encode_pos , cur_byte); - - return encode_pos; - } - - public static int BOSEncoder( - int[] data, int block_size, byte[] encoded_result) { - block_size++; - - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { - - encode_pos = BOSBlockEncoder(data, i, block_size,block_size, encode_pos,encoded_result); - - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } else { - - int start = block_num * block_size; - int remaining = length_all-start; - - - encode_pos = BOSBlockEncoder(data, block_num, block_size,remaining, encode_pos,encoded_result); - - -// int[] ts_block = new int[length_all-start]; -// if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, length_all - start); -// -// int supple_length; -// if (remaining_length % 8 == 0) { -// supple_length = 1; -// } else if (remaining_length % 8 == 1) { -// supple_length = 0; -// } else { -// supple_length = 9 - remaining_length % 8; -// } -// -// -// encode_pos = BOSBlockEncoder(ts_block, supple_length, encode_pos,encoded_result); - } - - - return encode_pos; - } - public static int EncodeBits(int num, - int bit_width, - int encode_pos, - byte[] cur_byte, - int[] bit_index_list){ - // 找到要插入的位的索引 - int bit_index = bit_index_list[0] ;//cur_byte[encode_pos + 1]; - - // 计算数值的起始位位置 - int remaining_bits = bit_width; - - while (remaining_bits > 0) { - // 计算在当前字节中可以使用的位数 - int available_bits = bit_index; - int bits_to_write = Math.min(available_bits, remaining_bits); - - // 更新 bit_index - bit_index = available_bits - bits_to_write; - - // 计算要写入的位的掩码和数值 - int mask = (1 << bits_to_write) - 1; - int bits = (num >> (remaining_bits - bits_to_write)) & mask; - - // 写入到当前位置 - cur_byte[encode_pos] &= (byte) ~(mask << bit_index); // 清除对应位置的位 - cur_byte[encode_pos] |= (byte) (bits << bit_index); - - // 更新位宽和数值 - remaining_bits -= bits_to_write; - if (bit_index == 0) { - bit_index = 8; - encode_pos++; - } - } - bit_index_list[0] = bit_index; -// cur_byte[encode_pos + 1] = (byte) bit_index; - return encode_pos; - } - private static int BOSEncodeBitsImprove(int[] ts_block_delta, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal = new ArrayList<>(); - int k1 = 0; - int k2 = 0; - - - - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { -// encode_pos = EncodeBits(cur_value,left_bit_width,encode_pos,cur_byte); -// final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - k1++; - - - } else if (cur_value >= final_k_end_value) { -// encode_pos = EncodeBits(cur_value- final_k_end_value,right_bit_width,encode_pos,cur_byte); -// final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { -// final_normal.add(cur_value - final_x_l_plus); -// encode_pos = EncodeBits(cur_value- final_x_l_plus,right_bit_width,encode_pos,cur_byte); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(min_delta[1],encode_pos,cur_byte); - encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - int[] bit_index_list = new int[1]; - bit_index_list[0] = 8; - - if(final_k_start_value<0 && final_k_end_value > max_delta_value){ -// int bit_width_final= getBitWith(final_x_u_minus - final_x_l_plus); -// cur_byte[encode_pos+1] = 8; - for (int cur_value : ts_block_delta) { - encode_pos = EncodeBits(cur_value, bit_width_final, encode_pos, cur_byte, bit_index_list); -// final_normal.add(cur_value); - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } -// cur_byte[encode_pos+1] = 0; -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - return encode_pos; - } - - - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } -// cur_byte[encode_pos+1] = 8; -// bit_index_list[0] = 8; - for (int cur_value : ts_block_delta) { - if (cur_value <= final_k_start_value) { - encode_pos = EncodeBits(cur_value, left_bit_width, encode_pos, cur_byte,bit_index_list); - } else if (cur_value >= final_k_end_value) { - encode_pos = EncodeBits(cur_value - final_k_end_value, right_bit_width, encode_pos, cur_byte,bit_index_list); - } else { - encode_pos = EncodeBits(cur_value - final_x_l_plus, bit_width_final, encode_pos, cur_byte,bit_index_list); - } - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } - -// cur_byte[encode_pos+1] = 0; - -// if(k1==0 && k2==0){ -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// -// -// } -// else{ -// int2Bytes(final_x_l_plus,encode_pos,cur_byte); -// encode_pos += 4; -// int2Bytes(final_k_end_value,encode_pos,cur_byte); -// encode_pos += 4; -// -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(left_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(right_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// if (final_alpha == 0) { // 0 -// -// for (int i : bitmap_outlier) { -// -// intByte2Bytes(i,encode_pos,cur_byte); -// encode_pos += 1; -// } -// } else { -// encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// } -// } - - -// if(k1+k2!=block_size) -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// if (k1 != 0) -// encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); -// if (k2 != 0) -// encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); - return encode_pos; - - } - private static int BOSBlockEncoderImprove(int[] ts_block, int block_i, int block_size, int remaining ,int encode_pos , byte[] cur_byte) { - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); - - - block_size = remaining-1; - int max_delta_value = min_delta[2]; - - int[] value_list = new int[block_size]; - int unique_value_count = 0; - int[] value_count_list = new int[max_delta_value+1]; - for(int value:ts_block_delta){ - if(value_count_list[value]==0){ - value_count_list[value] = 1; - value_list[unique_value_count] = value; - unique_value_count ++; - }else{ - value_count_list[value] ++; - } - } - - int left_shift = getBitWith(block_size); - int mask = (1 << left_shift) - 1; - long[] sorted_value_list = new long[unique_value_count]; - int count = 0; - - for(int i=0;i<unique_value_count;i++){ - int value = value_list[i]; - sorted_value_list[i] = (((long) value) << left_shift) + value_count_list[value]; - } - Arrays.sort(sorted_value_list); - - for(int i=0;i<unique_value_count;i++){ - count += getCount(sorted_value_list[i], mask); - sorted_value_list[i] = (((long)getUniqueValue(sorted_value_list[i], left_shift) ) << left_shift) + count;//new_value_list[i] - } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2 ) * (block_size)); - - int cur_k1 = 0; - - int x_l_plus_value = 0; // x_l_plus - int x_u_minus_value = max_delta_value; // x_u_plus - - for (int end_value_i = 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); // cur_k1 = 0 - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_x_u_minus = x_u_minus_value; - final_k_end_value = x_u_plus_value; - } - } - - int k_start_value = -1; // x_l_minus - - int gamma_max = getBitWith(max_delta_value); - int[] gamma_count_list = new int[gamma_max+1]; - int[] x_u_minus_value_list = new int[gamma_max+1]; - int[] x_u_plus_value_list = new int[gamma_max+1]; - int end_i = unique_value_count - 1; - for(int gamma = 0; gamma <= gamma_max; gamma++) { - int x_u_plus_pow_beta = max_delta_value - (1<< gamma) + 1; - for (; end_i > 0; end_i--) { - x_u_minus_value = getUniqueValue(sorted_value_list[end_i - 1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_i], left_shift); - if (x_u_minus_value < x_u_plus_pow_beta && x_u_plus_value >= x_u_plus_pow_beta){ - gamma_count_list[gamma] = getCount(sorted_value_list[end_i-1],mask); - x_u_minus_value_list[gamma] = x_u_minus_value; - x_u_plus_value_list[gamma] = x_u_plus_value; - } else if (x_u_minus_value < x_u_plus_pow_beta) { - break; - } - } - } - for(int gamma = 1; gamma < gamma_max; gamma++) { - if(gamma_count_list[gamma]==0){ - gamma_count_list[gamma] = gamma_count_list[gamma-1]; - x_u_minus_value_list[gamma] = x_u_minus_value_list[gamma-1]; - x_u_plus_value_list[gamma] = x_u_plus_value_list[gamma-1]; - } - } - - for (int start_value_i = 0; start_value_i < unique_value_count-1; start_value_i++) { - long k_start_valueL = sorted_value_list[start_value_i]; - k_start_value = getUniqueValue(k_start_valueL, left_shift) ; - x_l_plus_value = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - - - cur_k1 = getCount(k_start_valueL,mask); - - int k_end_value; - int cur_bits; - int cur_k2; - k_end_value = max_delta_value + 1; - - cur_bits = 0; - cur_k2 = 0; - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1) * getBitWith(k_end_value- x_l_plus_value); //cur_k2 =0 - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = max_delta_value; - } - - int beta_max = getBitWith(max_delta_value - x_l_plus_value); - - int lower_outlier_cost = cur_k1 * getBitWith(k_start_value); - - - - for(int gamma = 0; gamma < beta_max; gamma++){ -// int x_u_plus_pow_beta = (int) (max_delta_value - pow(2,gamma)+1); - x_u_minus_value = x_u_minus_value_list[gamma]; - k_end_value = x_u_plus_value_list[gamma]; - cur_bits = 0; - cur_k2 = block_size - gamma_count_list[gamma]; - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += lower_outlier_cost; - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = x_u_minus_value; - } - - } -// end_value_i = unique_value_count - 1; -// for(int gamma = 0; gamma <= beta_max; gamma++){ -// for (; end_value_i > start_value_i; end_value_i--) { -// int x_u_plus_pow_beta = (int) (max_delta_value - pow(2,gamma)+1); -// x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); -// k_end_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); -// if(x_u_minus_value < x_u_plus_pow_beta && k_end_value >= x_u_plus_pow_beta){ -// cur_bits = 0; -// cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); -// -// cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); -// cur_bits += cur_k1 * getBitWith(k_start_value); -// if (cur_k1 + cur_k2 != block_size) -// cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); -// if (cur_k2 != 0) -// cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); -// -// -// if (cur_bits < min_bits) { -// min_bits = cur_bits; -// final_k_start_value = k_start_value; -// final_x_l_plus = x_l_plus_value; -// final_k_end_value = k_end_value; -// final_x_u_minus = x_u_minus_value; -// } -// } else if (x_u_minus_value < x_u_plus_pow_beta && k_end_value < x_u_plus_pow_beta) { -// break; -// } -// } -// } -// - - } - - for(int beta = 0; beta < gamma_max; beta++){ - - int pow_beta = 1<<beta; - int start_value_i = 0; - int end_value_i = start_value_i+1; - - for (; start_value_i < unique_value_count-1; start_value_i++) { - long x_l_minusL = sorted_value_list[start_value_i]; - int x_l_minus = getUniqueValue(x_l_minusL, left_shift) ; - int x_l_plus = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - int x_u_plus_pow_beta = pow_beta+x_l_plus; - if(x_u_plus_pow_beta > max_delta_value) break; - - - - cur_k1 = getCount(x_l_minusL,mask); - int lower_outlier_cost = cur_k1 * getBitWith(x_l_minus); - - while ( end_value_i < unique_value_count) { -// if(beta==3 && end_value_i==22) -// { -// System.out.println(x_l_minus); -// System.out.println(x_l_plus); -// } - - int x_u_minus = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus = getUniqueValue(sorted_value_list[end_value_i], left_shift); - if(x_u_minus < x_u_plus_pow_beta && x_u_plus >= x_u_plus_pow_beta){ - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += lower_outlier_cost; - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus - x_l_plus); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = x_l_minus; - final_x_l_plus = x_l_plus; - final_k_end_value = x_u_plus; - final_x_u_minus = x_u_minus; - } - break; - } -// else if (x_u_minus >= x_u_plus_pow_beta && x_u_plus >= x_u_plus_pow_beta) { -// break; -// } - - end_value_i++; - } - } - - } - - encode_pos = BOSEncodeBitsImprove(ts_block_delta, final_k_start_value, final_x_l_plus, final_k_end_value, final_x_u_minus, - max_delta_value, min_delta, encode_pos , cur_byte); - - return encode_pos; - } - - public static int BOSEncoderImprove( - int[] data, int block_size, byte[] encoded_result) { - block_size++; - - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { - - encode_pos = BOSBlockEncoderImprove(data, i, block_size,block_size, encode_pos,encoded_result); - - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } else { - - int start = block_num * block_size; - int remaining = length_all-start; - - - encode_pos = BOSBlockEncoderImprove(data, block_num, block_size,remaining, encode_pos,encoded_result); - - -// int[] ts_block = new int[length_all-start]; -// if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, length_all - start); -// -// int supple_length; -// if (remaining_length % 8 == 0) { -// supple_length = 1; -// } else if (remaining_length % 8 == 1) { -// supple_length = 0; -// } else { -// supple_length = 9 - remaining_length % 8; -// } -// -// -// encode_pos = BOSBlockEncoder(ts_block, supple_length, encode_pos,encoded_result); - } - - - return encode_pos; - } - - - public static int BOSBlockDecoder(byte[] encoded, int decode_pos, int[] value_list, int block_size, int[] value_pos_arr) { - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - - int value0 = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] =value0; - value_pos_arr[0] ++; - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - int left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - int right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (block_size + k1 + k2) / (double) 8); - - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < block_size) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - - - ArrayList<Integer> decode_pos_normal = new ArrayList<>(); - final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size - k1 - k2, decode_pos_normal); - - decode_pos = decode_pos_normal.get(0); - if (k1 != 0) { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); - decode_pos = decode_pos_result_left.get(0); - } - if (k2 != 0) { - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); - decode_pos = decode_pos_result_right.get(0); - } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; - int pre_v = value0; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - - - for (int i = 0; i < block_size; i++) { - int current_delta; - if (left_outlier_i >= k1) { - if (right_outlier_i >= k2) { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } - } else if (i == final_left_outlier_index.get(left_outlier_i)) { - current_delta = final_left_outlier.get(left_outlier_i); - left_outlier_i++; - } else { - - if (right_outlier_i >= k2) { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } - } - - pre_v = deZigzag(current_delta) + min_delta + pre_v; - value_list[value_pos_arr[0]] =pre_v; - value_pos_arr[0] ++; - } - return decode_pos; - } - - public static void BOSDecoder(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - int[] value_list = new int[length_all+block_size]; - block_size --; - - int[] value_pos_arr = new int[1]; - for (int k = 0; k < block_num; k++) { - decode_pos = BOSBlockDecoder(encoded, decode_pos, value_list, block_size,value_pos_arr); - - } - - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - remain_length --; - BOSBlockDecoder(encoded, decode_pos, value_list, remain_length, value_pos_arr); - } - } - - public static int DecodeBits(byte[] cur_byte, int bit_width, int[] decode_pos_list) { - int decode_pos = decode_pos_list[0]; - int bit_index = decode_pos_list[1]; //cur_byte[decode_pos + 1]; - int remaining_bits = bit_width; - int num = 0; - - while (remaining_bits > 0) { - int available_bits = bit_index; - int bits_to_read = Math.min(available_bits, remaining_bits); - - // 计算要读取的位的掩码 - int mask = (1 << bits_to_read) - 1; - int bits = (cur_byte[decode_pos] >> (available_bits - bits_to_read)) & mask; - - // 将读取的位合并到结果中 - num = (num << bits_to_read) | bits; - - // 更新位宽和 bit_index - remaining_bits -= bits_to_read; - bit_index = available_bits - bits_to_read; - - if (bit_index == 0) { - bit_index = 8; - decode_pos++; - } - } - decode_pos_list[0] = decode_pos; - decode_pos_list[1] = bit_index; - - return num; - } - public static int BOSBlockDecoderImprove(byte[] encoded, int decode_pos, int[] value_list, int block_size, int[] value_pos_arr) { - - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - - int value0 = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] =value0; - value_pos_arr[0] ++; - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - int[] decode_list = new int[2]; - decode_list[0]= decode_pos; - decode_list[1]= 8; - - int valuePos = value_pos_arr[0]; - - if(k1==0 && k2==0){ - int pre_v = value0; - for (int i = 0; i < block_size; i++) { - int cur_delta = min_delta + DecodeBits(encoded, bit_width_final, decode_list); - pre_v += deZigzag(cur_delta); - value_list[valuePos++] = pre_v; - } - value_pos_arr[0] = valuePos; - return decode_list[0]; - } - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); -// ArrayList<Integer> final_left_outlier = new ArrayList<>(); -// ArrayList<Integer> final_right_outlier = new ArrayList<>(); -// ArrayList<Integer> final_normal= new ArrayList<>();; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int final_k_start_value = 0; - int final_k_end_value = 0; -// int bit_width_final = 0; - int left_bit_width = 0; - int right_bit_width = 0; - - final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - -// bit_width_final = bytes2Integer(encoded, decode_pos, 1); -// decode_pos += 1; - - left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (block_size + k1 + k2) / (double) 8); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < block_size ) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - - - - - -// ArrayList<Integer> decode_pos_normal = new ArrayList<>(); -// final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size - k1 - k2, decode_pos_normal); -// -// decode_pos = decode_pos_normal.get(0); -// if (k1 != 0) { -// ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); -// final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); -// decode_pos = decode_pos_result_left.get(0); -// } -// if (k2 != 0) { -// ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); -// final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); -// decode_pos = decode_pos_result_right.get(0); -// } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; - int pre_v = value0; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - -// Precompute constants - int normalOffset = min_delta + final_k_start_value; - int rightOutlierOffset = min_delta + final_k_end_value; - -// Initialize indices and pre-fetch next outlier positions - int leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - int rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - decode_list[0]= decode_pos; -// decode_list[1]= 8; - // Use a local variable for the position - for (int i = 0; i < block_size; i++) { - int currentDelta; - if (i == leftOutlierNextIndex) { - // Process left outlier - currentDelta = min_delta + DecodeBits(encoded, left_bit_width, decode_list); // final_left_outlier.get(left_outlier_i); - left_outlier_i++; - leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - } else if (i == rightOutlierNextIndex) { - // Process right outlier - currentDelta = rightOutlierOffset + DecodeBits(encoded, right_bit_width, decode_list);// final_right_outlier.get(right_outlier_i); - right_outlier_i++; - rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - } else { - // Process normal value - currentDelta = normalOffset + DecodeBits(encoded, bit_width_final, decode_list); - normal_i++; - } - - // Update the cumulative value and store it - pre_v += deZigzag(currentDelta); - value_list[valuePos++] = pre_v; - } - value_pos_arr[0] = valuePos; - if(decode_list[1]!=8){ - return decode_list[0]+1; - }else { - return decode_list[0]; - } -// decode_pos = decode_list[0]; -// Update the position in the array - - -// return decode_pos; - } - public static void BOSDecoderImprove(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - int[] value_list = new int[length_all+block_size]; - block_size --; - - int[] value_pos_arr = new int[1]; - for (int k = 0; k < block_num; k++) { - decode_pos = BOSBlockDecoderImprove(encoded, decode_pos, value_list, block_size,value_pos_arr); - - } - - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - remain_length --; - BOSBlockDecoderImprove(encoded, decode_pos, value_list, remain_length, value_pos_arr); - } - } - - public static void main(@org.jetbrains.annotations.NotNull String[] args) throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/sprintz_bos_b"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(256); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - -// for (int file_i = 8; file_i < 9; file_i++) { - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f = tempList[1]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - loader.readHeaders(); - - while (loader.readRecord()) { - - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - - } - - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - int repeatTime2 = 500; - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - String[] record = { - f.toString(), - "SPRINTZ+BOS-B", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - - - } - writer.close(); - - } - } - - @Test - public void BOSImproveDecodeTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R1O4_decode_time/compression_ratio/sprintz_bos_b"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 9; file_i < 10; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[2]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "SPRINTZ+BOS-B", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); -// break; - } - writer.close(); - } - } - - @Test - public void BOSImproveEncodeTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/sprintz_bos_b_improve"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 9; file_i < 10; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[2]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "SPRINTZ+BOS-B", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); -// break; - } - writer.close(); - } - } - -}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZBOSMTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZBOSMTest.java deleted file mode 100644 index 7e053c1..0000000 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZBOSMTest.java +++ /dev/null
@@ -1,2301 +0,0 @@ -package org.apache.iotdb.tsfile.encoding; - -import com.csvreader.CsvReader; -import com.csvreader.CsvWriter; -import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry; -import org.apache.commons.compress.archivers.sevenz.SevenZOutputFile; -import org.apache.iotdb.tsfile.compress.ICompressor; -import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType; -import org.junit.Test; - -import java.io.*; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.ArrayList; - -import static java.lang.Math.*; -import static org.apache.iotdb.tsfile.constant.TestConstant.random; - -public class SPRINTZBOSMTest { - - public static int getBitWith(int num) { - if (num == 0) return 1; - else return 32 - Integer.numberOfLeadingZeros(num); - } - public static int getCount(long long1, int mask) { - return ((int) (long1 & mask)); - } - public static int getUniqueValue(long long1, int left_shift) { - return ((int) ((long1) >> left_shift)); - } - - public static int findMedianImprove(int[] arr) { - if (arr == null || arr.length == 0) { - throw new IllegalArgumentException("数组不能为空"); - } - int n = arr.length; - return quickSelect2(arr, 0, n - 1, n / 2); - } - - private static int quickSelect2(int[] arr, int left, int right, int k){ - int pivotV=arr[left+random.nextInt(right-left+1)],tmpV; - int posEqual=left,posSmaller=left; // a[left,posEqual): = pivotV; a[posEqual,posSmaller): < pivotV - for(int i=left;i<=right;i++){ - if(arr[i]==pivotV){ - tmpV=arr[i]; - arr[i]=arr[posSmaller]; - arr[posSmaller]=arr[posEqual]; - arr[posEqual]=tmpV; - posEqual++; - posSmaller++; - }else - if(arr[i]<pivotV){ - tmpV=arr[posSmaller]; - arr[posSmaller]=arr[i]; - arr[i]=tmpV; - posSmaller++; - } - } - if(k+(posEqual-left)<=posSmaller-1)return quickSelect2(arr,posEqual,posSmaller-1,k+(posEqual-left)); - else if(k<=posSmaller-1)return pivotV; - else return quickSelect2(arr,posSmaller,right,k); - } - public static int findMedian(int[] arr) { - if (arr == null || arr.length == 0) { - throw new IllegalArgumentException("数组不能为空"); - } - int n = arr.length; - return quickSelect(arr, 0, n - 1, n / 2); - } - - private static int quickSelect(int[] arr, int left, int right, int k) { - if (left == right) { - return arr[left]; - } - - int pivotIndex = partition(arr, left, right); - if (k == pivotIndex) { - return arr[k]; - } else if (k < pivotIndex) { - return quickSelect(arr, left, pivotIndex - 1, k); - } else { - return quickSelect(arr, pivotIndex + 1, right, k); - } - } - - private static int partition(int[] arr, int left, int right) { - int pivot = arr[right]; - int i = left; - for (int j = left; j < right; j++) { - if (arr[j] <= pivot) { - swap(arr, i, j); - i++; - } - } - swap(arr, i, right); - return i; - } - - private static void swap(int[] arr, int i, int j) { - int temp = arr[i]; - arr[i] = arr[j]; - arr[j] = temp; - } - public static int zigzag(int num) { - if (num < 0) return ((-num)<<1)-1; - else return num<<1; - } - - public static int deZigzag(int num) { - if (num % 2 == 0) return num>>1; - else return -((num+1)>>1); - } - - public static void int2Bytes(int integer,int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - - public static void intByte2Bytes(int integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer); - } - - private static void long2intBytes(long integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - public static int bytes2Integer(byte[] encoded, int start, int num) { - int value = 0; - if (num > 4) { - System.out.println("bytes2Integer error"); - return 0; - } - for (int i = 0; i < num; i++) { - value <<= 8; - int b = encoded[i + start] & 0xFF; - value |= b; - } - return value; - } - - private static long bytesLong2Integer(byte[] encoded, int decode_pos) { - long value = 0; - for (int i = 0; i < 4; i++) { - value <<= 8; - int b = encoded[i + decode_pos] & 0xFF; - value |= b; - } - return value; - } - - public static void pack8Values(ArrayList<Integer> values, int offset, int width, int encode_pos, byte[] encoded_result) { - int bufIdx = 0; - int valueIdx = offset; - // remaining bits for the current unfinished Integer - int leftBit = 0; - - while (valueIdx < 8 + offset) { - // buffer is used for saving 32 bits as a part of result - int buffer = 0; - // remaining size of bits in the 'buffer' - int leftSize = 32; - - // encode the left bits of current Integer to 'buffer' - if (leftBit > 0) { - buffer |= (values.get(valueIdx) << (32 - leftBit)); - leftSize -= leftBit; - leftBit = 0; - valueIdx++; - } - - while (leftSize >= width && valueIdx < 8 + offset) { - // encode one Integer to the 'buffer' - buffer |= (values.get(valueIdx)<< (leftSize - width)); - leftSize -= width; - valueIdx++; - } - // If the remaining space of the buffer can not save the bits for one Integer, - if (leftSize > 0 && valueIdx < 8 + offset) { - // put the first 'leftSize' bits of the Integer into remaining space of the - // buffer - buffer |= (values.get(valueIdx) >>> (width - leftSize)); - leftBit = width - leftSize; - } - - // put the buffer into the final result - for (int j = 0; j < 4; j++) { - encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); - encode_pos ++; - bufIdx++; - if (bufIdx >= width) { - return ; - } - } - } - - } - - public static void unpack8Values(byte[] encoded, int offset,int width, ArrayList<Integer> result_list) { - int byteIdx = offset; - long buffer = 0; - // total bits which have read from 'buf' to 'buffer'. i.e., - // number of available bits to be decoded. - int totalBits = 0; - int valueIdx = 0; - - while (valueIdx < 8) { - // If current available bits are not enough to decode one Integer, - // then add next byte from buf to 'buffer' until totalBits >= width - while (totalBits < width) { - buffer = (buffer << 8) | (encoded[byteIdx] & 0xFF); - byteIdx++; - totalBits += 8; - } - - // If current available bits are enough to decode one Integer, - // then decode one Integer one by one until left bits in 'buffer' is - // not enough to decode one Integer. - while (totalBits >= width && valueIdx < 8) { - result_list.add ((int) (buffer >>> (totalBits - width))); - valueIdx++; - totalBits -= width; - buffer = buffer & ((1L << totalBits) - 1); - } - } - } - - public static int bitPacking(ArrayList<Integer> numbers, int start, int bit_width,int encode_pos, byte[] encoded_result) { - int block_num = (numbers.size()-start) / 8; - for(int i=0;i<block_num;i++){ - pack8Values( numbers, start+i*8, bit_width,encode_pos, encoded_result); - encode_pos +=bit_width; - } - - return encode_pos; - - } - - public static ArrayList<Integer> decodeBitPacking( - byte[] encoded, int decode_pos, int bit_width, int block_size) { - ArrayList<Integer> result_list = new ArrayList<>(); - int block_num = (block_size - 1) / 8; - - for (int i = 0; i < block_num; i++) { // bitpacking - unpack8Values( encoded, decode_pos, bit_width, result_list); - decode_pos += bit_width; - } - return result_list; - } - - // ----------------------------------------------------------------- - - public static int[] getAbsDeltaTsBlock( - int[] ts_block, - int i, - int block_size, - int remaining, - int[] min_delta) { - int[] ts_block_delta = new int[remaining-1]; - - int base = i*block_size+1; - int end = i*block_size+remaining; - min_delta[0]=ts_block[base-1]; - int value_delta_min = Integer.MAX_VALUE; - int value_delta_max = Integer.MIN_VALUE; - for (int j = base; j < end; j++) { - int epsilon_v = ts_block[j] - ts_block[j - 1]; - epsilon_v = zigzag(epsilon_v); - if (epsilon_v < value_delta_min) { - value_delta_min = epsilon_v; - } - if (epsilon_v > value_delta_max) { - value_delta_max = epsilon_v; - } - ts_block_delta[j-base] =epsilon_v; - - } - for (int j = 0; j < remaining-1; j++) { - ts_block_delta[j] =ts_block_delta[j]-value_delta_min; - - } - min_delta[1] = (value_delta_min); - min_delta[2] = (value_delta_max-value_delta_min); - - - return ts_block_delta; - } - - public static int encodeOutlier2Bytes( - ArrayList<Integer> ts_block_delta, - int bit_width, - int encode_pos, byte[] encoded_result) { - - encode_pos = bitPacking(ts_block_delta, 0, bit_width, encode_pos, encoded_result); - - int n_k = ts_block_delta.size(); - int n_k_b = n_k / 8; - long cur_remaining = 0; // encoded int - int cur_number_bits = 0; // the bit width used of encoded int - for (int i = n_k_b * 8; i < n_k; i++) { - long cur_value = ts_block_delta.get(i); - int cur_bit_width = bit_width; // remaining bit width of current value - - if (cur_number_bits + bit_width >= 32) { - cur_remaining <<= (32 - cur_number_bits); - cur_bit_width = bit_width - 32 + cur_number_bits; - cur_remaining += ((cur_value >> cur_bit_width)); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - cur_remaining = 0; - cur_number_bits = 0; - } - - cur_remaining <<= cur_bit_width; - cur_number_bits += cur_bit_width; - cur_remaining += (((cur_value << (32 - cur_bit_width)) & 0xFFFFFFFFL) >> (32 - cur_bit_width)); - } - cur_remaining <<= (32 - cur_number_bits); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - return encode_pos; - - - } - - - public static ArrayList<Integer> decodeOutlier2Bytes( - byte[] encoded, - int decode_pos, - int bit_width, - int length, - ArrayList<Integer> encoded_pos_result - ) { - - int n_k_b = length / 8; - int remaining = length - n_k_b * 8; - ArrayList<Integer> result_list = new ArrayList<>(decodeBitPacking(encoded, decode_pos, bit_width, n_k_b * 8 + 1)); - decode_pos += n_k_b * bit_width; - - ArrayList<Long> int_remaining = new ArrayList<>(); - int int_remaining_size = remaining * bit_width / 32 + 1; - for (int j = 0; j < int_remaining_size; j++) { - int_remaining.add(bytesLong2Integer(encoded, decode_pos)); - decode_pos += 4; - } - - int cur_remaining_bits = 32; // remaining bit width of current value - long cur_number = int_remaining.get(0); - int cur_number_i = 1; - for (int i = n_k_b * 8; i < length; i++) { - if (bit_width < cur_remaining_bits) { - int tmp = (int) (cur_number >> (32 - bit_width)); - result_list.add(tmp); - cur_number <<= bit_width; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits -= bit_width; - } else { - int tmp = (int) (cur_number >> (32 - cur_remaining_bits)); - int remain_bits = bit_width - cur_remaining_bits; - tmp <<= remain_bits; - - cur_number = int_remaining.get(cur_number_i); - cur_number_i++; - tmp += (cur_number >> (32 - remain_bits)); - result_list.add(tmp); - cur_number <<= remain_bits; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits = 32 - remain_bits; - } - } - encoded_pos_result.add(decode_pos); - return result_list; - } - - private static void addToArchiveCompression(SevenZOutputFile out, File file, String dir) { - String name = dir + File.separator + file.getName(); - if(dir.equals(".")) { - name = file.getName(); - } - if (file.isFile()){ - SevenZArchiveEntry entry = null; - FileInputStream in = null; - try { - entry = out.createArchiveEntry(file, name); - out.putArchiveEntry(entry); - in = new FileInputStream(file); - byte[] b = new byte[1024]; - int count = 0; - while ((count = in.read(b)) > 0) { - out.write(b, 0, count); - } - } catch (IOException e) { - e.printStackTrace(); - } finally { - try { - out.closeArchiveEntry(); - in.close(); - } catch (IOException e) { - e.printStackTrace(); - } - - } - } else if (file.isDirectory()) { - File[] children = file.listFiles(); - if (children != null){ - for (File child : children){ - addToArchiveCompression(out, child, name); - } - } - } else { - System.out.println(file.getName() + " is not supported"); - } - } - public static int EncodeBits(int num, - int bit_width, - int encode_pos, - byte[] cur_byte, - int[] bit_index_list){ - // 找到要插入的位的索引 - int bit_index = bit_index_list[0] ;//cur_byte[encode_pos + 1]; - - // 计算数值的起始位位置 - int remaining_bits = bit_width; - - while (remaining_bits > 0) { - // 计算在当前字节中可以使用的位数 - int available_bits = bit_index; - int bits_to_write = Math.min(available_bits, remaining_bits); - - // 更新 bit_index - bit_index = available_bits - bits_to_write; - - // 计算要写入的位的掩码和数值 - int mask = (1 << bits_to_write) - 1; - int bits = (num >> (remaining_bits - bits_to_write)) & mask; - - // 写入到当前位置 - cur_byte[encode_pos] &= (byte) ~(mask << bit_index); // 清除对应位置的位 - cur_byte[encode_pos] |= (byte) (bits << bit_index); - - // 更新位宽和数值 - remaining_bits -= bits_to_write; - if (bit_index == 0) { - bit_index = 8; - encode_pos++; - } - } - bit_index_list[0] = bit_index; -// cur_byte[encode_pos + 1] = (byte) bit_index; - return encode_pos; - } - private static int BOSEncodeBitsImprove(int[] ts_block_delta, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal = new ArrayList<>(); - int k1 = 0; - int k2 = 0; - - - - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { -// encode_pos = EncodeBits(cur_value,left_bit_width,encode_pos,cur_byte); -// final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - k1++; - - - } else if (cur_value >= final_k_end_value) { -// encode_pos = EncodeBits(cur_value- final_k_end_value,right_bit_width,encode_pos,cur_byte); -// final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { -// final_normal.add(cur_value - final_x_l_plus); -// encode_pos = EncodeBits(cur_value- final_x_l_plus,right_bit_width,encode_pos,cur_byte); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(min_delta[1],encode_pos,cur_byte); - encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - int[] bit_index_list = new int[1]; - bit_index_list[0] = 8; - - if(final_k_start_value<0 && final_k_end_value > max_delta_value){ -// int bit_width_final= getBitWith(final_x_u_minus - final_x_l_plus); -// cur_byte[encode_pos+1] = 8; - for (int cur_value : ts_block_delta) { - encode_pos = EncodeBits(cur_value, bit_width_final, encode_pos, cur_byte, bit_index_list); -// final_normal.add(cur_value); - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } -// cur_byte[encode_pos+1] = 0; -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - return encode_pos; - } - - - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } -// cur_byte[encode_pos+1] = 8; -// bit_index_list[0] = 8; - for (int cur_value : ts_block_delta) { - if (cur_value <= final_k_start_value) { - encode_pos = EncodeBits(cur_value, left_bit_width, encode_pos, cur_byte,bit_index_list); - } else if (cur_value >= final_k_end_value) { - encode_pos = EncodeBits(cur_value - final_k_end_value, right_bit_width, encode_pos, cur_byte,bit_index_list); - } else { - encode_pos = EncodeBits(cur_value - final_x_l_plus, bit_width_final, encode_pos, cur_byte,bit_index_list); - } - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } - -// cur_byte[encode_pos+1] = 0; - -// if(k1==0 && k2==0){ -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// -// -// } -// else{ -// int2Bytes(final_x_l_plus,encode_pos,cur_byte); -// encode_pos += 4; -// int2Bytes(final_k_end_value,encode_pos,cur_byte); -// encode_pos += 4; -// -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(left_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(right_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// if (final_alpha == 0) { // 0 -// -// for (int i : bitmap_outlier) { -// -// intByte2Bytes(i,encode_pos,cur_byte); -// encode_pos += 1; -// } -// } else { -// encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// } -// } - - -// if(k1+k2!=block_size) -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// if (k1 != 0) -// encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); -// if (k2 != 0) -// encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); - return encode_pos; - - } - private static int BOSBlockEncoderImprove(int[] ts_block, int block_i, int block_size, int remaining ,int encode_pos , byte[] cur_byte) { - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); - - - block_size = remaining-1; - int max_delta_value = min_delta[2]; - - int max_bit_width = getBitWith(max_delta_value) + 1; - - - int[] findMedianArray = new int[block_size]; - System.arraycopy(ts_block_delta, 0, findMedianArray, 0, block_size); - - int median = findMedianImprove(findMedianArray); - - // int xl= median; - // int xu = median; - // xl = 2 median - xu - // xl = xu - 2 ^ beta - int left_number = 0; - int right_number = 0; - - int length_outlier = block_size; -// for(int value:findMedianArray){ -// if(value<=median) left_number++; -// if (value >= median) right_number ++; -// } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2) * (block_size)); - - - int[] count_left = new int[max_bit_width]; - int[] count_right = new int[max_bit_width]; - int count_0 = 0; - - - for(int i=0;i<length_outlier;i++){ - int cur_value = ts_block_delta[i]; - if(cur_value > median){ - int beta = getBitWith(cur_value - median) ; - count_right[beta] ++; - } else if (cur_value < median) { - int beta = getBitWith(median - cur_value) ; - count_left[beta] ++; - }else{ - count_0 ++; - } - - - } - - - for(int beta = max_bit_width - 1; beta > 0 ; beta --){ - left_number += count_left[beta]; - right_number += count_right[beta]; - int pow_beta = (int) pow(2,beta-1); - int xu = min(max_delta_value+1, median + pow_beta) ; - int xl = max(median - pow_beta,-1); - int cur_bits = Math.min((left_number + right_number) * getBitWith(block_size - 1), block_size + left_number + right_number); - cur_bits += left_number * getBitWith(xl); - cur_bits += right_number * getBitWith(max_delta_value - xu); - cur_bits += (block_size - left_number - right_number) * getBitWith(xu - xl - 2); - if (cur_bits < min_bits) { - min_bits = cur_bits; - - final_k_start_value = xl; - final_x_l_plus = xl + 1; - final_k_end_value = xu; - final_x_u_minus = xu -1; - } - - } - - encode_pos = BOSEncodeBitsImprove(ts_block_delta, final_k_start_value, final_x_l_plus, final_k_end_value, final_x_u_minus, - max_delta_value, min_delta, encode_pos , cur_byte); - - return encode_pos; - } - - public static int BOSEncoderImprove( - int[] data, int block_size, byte[] encoded_result) { - block_size++; - - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { - - encode_pos = BOSBlockEncoderImprove(data, i, block_size,block_size, encode_pos,encoded_result); - - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } else { - - int start = block_num * block_size; - int remaining = length_all-start; - - - encode_pos = BOSBlockEncoderImprove(data, block_num, block_size,remaining, encode_pos,encoded_result); - - -// int[] ts_block = new int[length_all-start]; -// if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, length_all - start); -// -// int supple_length; -// if (remaining_length % 8 == 0) { -// supple_length = 1; -// } else if (remaining_length % 8 == 1) { -// supple_length = 0; -// } else { -// supple_length = 9 - remaining_length % 8; -// } -// -// -// encode_pos = BOSBlockEncoder(ts_block, supple_length, encode_pos,encoded_result); - } - - - return encode_pos; - } - - private static int BOSEncodeBits(int[] ts_block_delta, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal = new ArrayList<>(); - int k1 = 0; - int k2 = 0; - - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { - final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - k1++; - - - } else if (cur_value >= final_k_end_value) { - final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { - final_normal.add(cur_value - final_k_start_value-1); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(min_delta[1],encode_pos,cur_byte); - encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - - if(k1==0 && k2==0){ - bit_width_final = getBitWith(max_delta_value); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// return encode_pos; - } - else{ - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - - bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } - } - - encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - if (k1 != 0) - encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); - if (k2 != 0) - encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); - return encode_pos; - - } - - - private static int BOSBlockEncoder(int[] ts_block, int block_i, int block_size, int remaining ,int encode_pos , byte[] cur_byte) { - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); - - - block_size = remaining-1; - int max_delta_value = min_delta[2]; - - int max_bit_width = getBitWith(max_delta_value) + 1; - - - int[] findMedianArray = new int[block_size]; - System.arraycopy(ts_block_delta, 0, findMedianArray, 0, block_size); - - int median = findMedian(findMedianArray); - - // int xl= median; - // int xu = median; - // xl = 2 median - xu - // xl = xu - 2 ^ beta - int left_number = 0; - int right_number = 0; - - int length_outlier = block_size; -// for(int value:findMedianArray){ -// if(value<=median) left_number++; -// if (value >= median) right_number ++; -// } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2) * (block_size)); - - - int[] count_left = new int[max_bit_width]; - int[] count_right = new int[max_bit_width]; - int count_0 = 0; - - - for(int i=0;i<length_outlier;i++){ - int cur_value = ts_block_delta[i]; - if(cur_value > median){ - int beta = getBitWith(cur_value - median) ; - count_right[beta] ++; - } else if (cur_value < median) { - int beta = getBitWith(median - cur_value) ; - count_left[beta] ++; - }else{ - count_0 ++; - } - - - } - - - for(int beta = max_bit_width - 1; beta > 0 ; beta --){ - left_number += count_left[beta]; - right_number += count_right[beta]; - int pow_beta = (int) pow(2,beta-1); - int xu = min(max_delta_value+1, median + pow_beta) ; - int xl = max(median - pow_beta,-1); - int cur_bits = Math.min((left_number + right_number) * getBitWith(block_size - 1), block_size + left_number + right_number); - cur_bits += left_number * getBitWith(xl); - cur_bits += right_number * getBitWith(max_delta_value - xu); - cur_bits += (block_size - left_number - right_number) * getBitWith(xu - xl - 2); - if (cur_bits < min_bits) { - min_bits = cur_bits; - - final_k_start_value = xl; - final_x_l_plus = xl + 1; - final_k_end_value = xu; - final_x_u_minus = xu -1; - } - - } - - encode_pos = BOSEncodeBits(ts_block_delta, final_k_start_value, final_x_l_plus, final_k_end_value, final_x_u_minus, - max_delta_value, min_delta, encode_pos , cur_byte); - - return encode_pos; - } - - - public static int BOSEncoder( - int[] data, int block_size, byte[] encoded_result) { - block_size++; - - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { - - encode_pos = BOSBlockEncoder(data, i, block_size,block_size, encode_pos,encoded_result); - - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } else { - - int start = block_num * block_size; - int remaining = length_all-start; - - - encode_pos = BOSBlockEncoder(data, block_num, block_size,remaining, encode_pos,encoded_result); - - -// int[] ts_block = new int[length_all-start]; -// if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, length_all - start); -// -// int supple_length; -// if (remaining_length % 8 == 0) { -// supple_length = 1; -// } else if (remaining_length % 8 == 1) { -// supple_length = 0; -// } else { -// supple_length = 9 - remaining_length % 8; -// } -// -// -// encode_pos = BOSBlockEncoder(ts_block, supple_length, encode_pos,encoded_result); - } - - - return encode_pos; - } - - - public static int BOSBlockDecoder(byte[] encoded, int decode_pos, int[] value_list, int block_size, int[] value_pos_arr) { - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - - int value0 = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] =value0; - value_pos_arr[0] ++; - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal= new ArrayList<>();; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int final_k_start_value = 0; - int final_k_end_value = 0; - int bit_width_final = 0; - int left_bit_width = 0; - int right_bit_width = 0; - - if(k1!=0 || k2 != 0){ - final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (block_size + k1 + k2) / (double) 8); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < block_size ) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - }else { - bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - } - - - ArrayList<Integer> decode_pos_normal = new ArrayList<>(); - final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size - k1 - k2, decode_pos_normal); - - decode_pos = decode_pos_normal.get(0); - if (k1 != 0) { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); - decode_pos = decode_pos_result_left.get(0); - } - if (k2 != 0) { - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); - decode_pos = decode_pos_result_right.get(0); - } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; - int pre_v = value0; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - - - for (int i = 0; i < block_size; i++) { - int current_delta; - if (left_outlier_i >= k1) { - if (right_outlier_i >= k2) { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } - } else if (i == final_left_outlier_index.get(left_outlier_i)) { - current_delta = final_left_outlier.get(left_outlier_i); - left_outlier_i++; - } else { - - if (right_outlier_i >= k2) { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } - } - - pre_v = deZigzag(current_delta) + min_delta + pre_v; - value_list[value_pos_arr[0]] =pre_v; - value_pos_arr[0] ++; - } - return decode_pos; - } - - public static int DecodeBits(byte[] cur_byte, int bit_width, int[] decode_pos_list) { - int decode_pos = decode_pos_list[0]; - int bit_index = decode_pos_list[1]; //cur_byte[decode_pos + 1]; - int remaining_bits = bit_width; - int num = 0; - - while (remaining_bits > 0) { - int available_bits = bit_index; - int bits_to_read = Math.min(available_bits, remaining_bits); - - // 计算要读取的位的掩码 - int mask = (1 << bits_to_read) - 1; - int bits = (cur_byte[decode_pos] >> (available_bits - bits_to_read)) & mask; - - // 将读取的位合并到结果中 - num = (num << bits_to_read) | bits; - - // 更新位宽和 bit_index - remaining_bits -= bits_to_read; - bit_index = available_bits - bits_to_read; - - if (bit_index == 0) { - bit_index = 8; - decode_pos++; - } - } - decode_pos_list[0] = decode_pos; - decode_pos_list[1] = bit_index; - - return num; - } - public static int BOSBlockDecoderImprove(byte[] encoded, int decode_pos, int[] value_list, int block_size, int[] value_pos_arr) { - - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - - int value0 = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] =value0; - value_pos_arr[0] ++; - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - int[] decode_list = new int[2]; - decode_list[0]= decode_pos; - decode_list[1]= 8; - - int valuePos = value_pos_arr[0]; - - if(k1==0 && k2==0){ - int pre_v = value0; - for (int i = 0; i < block_size; i++) { - int cur_delta = min_delta + DecodeBits(encoded, bit_width_final, decode_list); - pre_v += deZigzag(cur_delta); - value_list[valuePos++] = pre_v; - } - value_pos_arr[0] = valuePos; - return decode_list[0]; - } - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); -// ArrayList<Integer> final_left_outlier = new ArrayList<>(); -// ArrayList<Integer> final_right_outlier = new ArrayList<>(); -// ArrayList<Integer> final_normal= new ArrayList<>();; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int final_k_start_value = 0; - int final_k_end_value = 0; -// int bit_width_final = 0; - int left_bit_width = 0; - int right_bit_width = 0; - - final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - -// bit_width_final = bytes2Integer(encoded, decode_pos, 1); -// decode_pos += 1; - - left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (block_size + k1 + k2) / (double) 8); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < block_size ) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - - - - - -// ArrayList<Integer> decode_pos_normal = new ArrayList<>(); -// final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size - k1 - k2, decode_pos_normal); -// -// decode_pos = decode_pos_normal.get(0); -// if (k1 != 0) { -// ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); -// final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); -// decode_pos = decode_pos_result_left.get(0); -// } -// if (k2 != 0) { -// ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); -// final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); -// decode_pos = decode_pos_result_right.get(0); -// } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; - int pre_v = value0; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - -// Precompute constants - int normalOffset = min_delta + final_k_start_value; - int rightOutlierOffset = min_delta + final_k_end_value; - -// Initialize indices and pre-fetch next outlier positions - int leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - int rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - decode_list[0]= decode_pos; -// decode_list[1]= 8; - // Use a local variable for the position - for (int i = 0; i < block_size; i++) { - int currentDelta; - if (i == leftOutlierNextIndex) { - // Process left outlier - currentDelta = min_delta + DecodeBits(encoded, left_bit_width, decode_list); // final_left_outlier.get(left_outlier_i); - left_outlier_i++; - leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - } else if (i == rightOutlierNextIndex) { - // Process right outlier - currentDelta = rightOutlierOffset + DecodeBits(encoded, right_bit_width, decode_list);// final_right_outlier.get(right_outlier_i); - right_outlier_i++; - rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - } else { - // Process normal value - currentDelta = normalOffset + DecodeBits(encoded, bit_width_final, decode_list); - normal_i++; - } - - // Update the cumulative value and store it - pre_v += deZigzag(currentDelta); - value_list[valuePos++] = pre_v; - } - value_pos_arr[0] = valuePos; - if(decode_list[1]!=8){ - return decode_list[0]+1; - }else { - return decode_list[0]; - } -// decode_pos = decode_list[0]; -// Update the position in the array - - -// return decode_pos; - } - public static void BOSDecoderImprove(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - int[] value_list = new int[length_all+block_size]; - block_size --; - - int[] value_pos_arr = new int[1]; - for (int k = 0; k < block_num; k++) { - decode_pos = BOSBlockDecoderImprove(encoded, decode_pos, value_list, block_size,value_pos_arr); - - } - - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - remain_length --; - BOSBlockDecoderImprove(encoded, decode_pos, value_list, remain_length, value_pos_arr); - } - } - - public static void BOSDecoder(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - int[] value_list = new int[length_all+block_size]; - block_size --; - - int[] value_pos_arr = new int[1]; - for (int k = 0; k < block_num; k++) { - decode_pos = BOSBlockDecoder(encoded, decode_pos, value_list, block_size,value_pos_arr); - - } - - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - remain_length --; - BOSBlockDecoder(encoded, decode_pos, value_list, remain_length, value_pos_arr); - } - } - - public static void main(@org.jetbrains.annotations.NotNull String[] args) throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/sprintz_bos_m"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(256); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - -// for (int file_i = 8; file_i < 9; file_i++) { - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f = tempList[1]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - loader.readHeaders(); - - while (loader.readRecord()) { - - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - - } - - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - int repeatTime2 = 100; - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - String[] record = { - f.toString(), - "SPRINTZ+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - - - } - writer.close(); - - } - } - - @Test - public void compressTest() throws IOException { -// String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path - String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R3O2_compare_compression/compression_ratio/sprintz_m_c"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(256); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 1; -// for (int file_i = 8; file_i < 9; file_i++) { - CompressionType[] compressList = { - CompressionType.LZ4, - CompressionType.LZMA2, - }; - - for (int file_i = input_path_list.size()-1; file_i >=0 ; file_i--) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Compress Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - loader.readHeaders(); - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - inputStream.close(); - - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - - for (CompressionType comp : compressList) { - double ratio = 0; - double compressed_size = 0; - ICompressor compressor = ICompressor.getCompressor(comp); - byte[] compressed = compressor.compress(encoded_result); - - // test compression ratio and compressed size - compressed_size += compressed.length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "SPRINTZ+BOS-M", - comp.toString(), - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - double ratio = 0; - double compressed_size = 0; - File outfile = new File(parent_dir + "icde0802/example.bin"); - - // 使用FileOutputStream将byte数组写入文件 - try (FileOutputStream fos = new FileOutputStream(outfile)) { - fos.write(encoded_result); - } catch (IOException e2) { - // 处理可能的I/O异常 - e2.printStackTrace(); - } - - File input = new File(parent_dir + "icde0802/example.bin"); - File output = new File(parent_dir + "icde0802/example.7z"); - SevenZOutputFile out = new SevenZOutputFile(output); - - addToArchiveCompression(out, input, "."); - out.closeArchiveEntry(); - - long compressed = output.length(); - - - // test compression ratio and compressed size - compressed_size += compressed; - double ratioTmp = - (double) compressed / (double) (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - - - String[] record = { - f.toString(), - "SPRINTZ+BOS-M", - "7ZIP", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - writer.close(); - } - } - - @Test - public void ExpTest() throws IOException {// String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path - String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/sprintz_exp_test"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("Synthetic_Exp_100"); - dataset_name.add("Synthetic_Exp_1000"); - dataset_name.add("Synthetic_Exp_10000"); - dataset_name.add("Synthetic_Exp_100000"); - dataset_name.add("Synthetic_Exp_1000000"); - dataset_name.add("Synthetic_Normal_100"); - dataset_name.add("Synthetic_Normal_1000"); - dataset_name.add("Synthetic_Normal_10000"); - dataset_name.add("Synthetic_Normal_100000"); - dataset_name.add("Synthetic_Normal_1000000"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/Exp_100.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Exp_1000.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Exp_10000.csv");// 2 - output_path_list.add(output_parent_dir + "/Exp_100000.csv");// 2 - output_path_list.add(output_parent_dir + "/Exp_1000000.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Normal_100.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Normal_1000.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Normal_10000.csv");//5 - output_path_list.add(output_parent_dir + "/Normal_100000.csv");//5 - output_path_list.add(output_parent_dir + "/Normal_1000000.csv");//5 -// dataset_block_size.add(2048); - -// for (int file_i = 8; file_i < 9; file_i++) { - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f = tempList[1]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - loader.readHeaders(); - - while (loader.readRecord()) { - - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - - } - - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - int repeatTime2 = 1; - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - String[] record = { - f.toString(), - "SPRINTZ+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - - - } - writer.close(); - - } - } - - @Test - public void BOSImproveDecodeTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R1O4_decode_time/compression_ratio/sprintz_bos_m"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 9; file_i < 10; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[2]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "SPRINTZ+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); -// break; - } - writer.close(); - } - } - - @Test - public void BOSImproveEncodeTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/sprintz_bos_m_improve"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 9; file_i < 10; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[2]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "SPRINTZ+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); -// break; - } - writer.close(); - } - } - -}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZBOSVTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZBOSVTest.java deleted file mode 100644 index da8590d..0000000 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZBOSVTest.java +++ /dev/null
@@ -1,1807 +0,0 @@ -package org.apache.iotdb.tsfile.encoding; - -import com.csvreader.CsvReader; -import com.csvreader.CsvWriter; -import org.junit.Test; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; - -import static java.lang.Math.*; - -public class SPRINTZBOSVTest { - - public static int getBitWith(int num) { - if (num == 0) return 1; - else return 32 - Integer.numberOfLeadingZeros(num); - } - public static int getCount(long long1, int mask) { - return ((int) (long1 & mask)); - } - public static int getUniqueValue(long long1, int left_shift) { - return ((int) ((long1) >> left_shift)); - } - - public static int zigzag(int num) { - if (num < 0) return ((-num)<<1)-1; - else return num<<1; - } - - public static int deZigzag(int num) { - if (num % 2 == 0) return num>>1; - else return -((num+1)>>1); - } - - public static void int2Bytes(int integer,int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - - public static void intByte2Bytes(int integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer); - } - - private static void long2intBytes(long integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - public static int bytes2Integer(byte[] encoded, int start, int num) { - int value = 0; - if (num > 4) { - System.out.println("bytes2Integer error"); - return 0; - } - for (int i = 0; i < num; i++) { - value <<= 8; - int b = encoded[i + start] & 0xFF; - value |= b; - } - return value; - } - - private static long bytesLong2Integer(byte[] encoded, int decode_pos) { - long value = 0; - for (int i = 0; i < 4; i++) { - value <<= 8; - int b = encoded[i + decode_pos] & 0xFF; - value |= b; - } - return value; - } - - public static void pack8Values(ArrayList<Integer> values, int offset, int width, int encode_pos, byte[] encoded_result) { - int bufIdx = 0; - int valueIdx = offset; - // remaining bits for the current unfinished Integer - int leftBit = 0; - - while (valueIdx < 8 + offset) { - // buffer is used for saving 32 bits as a part of result - int buffer = 0; - // remaining size of bits in the 'buffer' - int leftSize = 32; - - // encode the left bits of current Integer to 'buffer' - if (leftBit > 0) { - buffer |= (values.get(valueIdx) << (32 - leftBit)); - leftSize -= leftBit; - leftBit = 0; - valueIdx++; - } - - while (leftSize >= width && valueIdx < 8 + offset) { - // encode one Integer to the 'buffer' - buffer |= (values.get(valueIdx)<< (leftSize - width)); - leftSize -= width; - valueIdx++; - } - // If the remaining space of the buffer can not save the bits for one Integer, - if (leftSize > 0 && valueIdx < 8 + offset) { - // put the first 'leftSize' bits of the Integer into remaining space of the - // buffer - buffer |= (values.get(valueIdx) >>> (width - leftSize)); - leftBit = width - leftSize; - } - - // put the buffer into the final result - for (int j = 0; j < 4; j++) { - encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); - encode_pos ++; - bufIdx++; - if (bufIdx >= width) { - return ; - } - } - } - - } - - public static void unpack8Values(byte[] encoded, int offset,int width, ArrayList<Integer> result_list) { - int byteIdx = offset; - long buffer = 0; - // total bits which have read from 'buf' to 'buffer'. i.e., - // number of available bits to be decoded. - int totalBits = 0; - int valueIdx = 0; - - while (valueIdx < 8) { - // If current available bits are not enough to decode one Integer, - // then add next byte from buf to 'buffer' until totalBits >= width - while (totalBits < width) { - buffer = (buffer << 8) | (encoded[byteIdx] & 0xFF); - byteIdx++; - totalBits += 8; - } - - // If current available bits are enough to decode one Integer, - // then decode one Integer one by one until left bits in 'buffer' is - // not enough to decode one Integer. - while (totalBits >= width && valueIdx < 8) { - result_list.add ((int) (buffer >>> (totalBits - width))); - valueIdx++; - totalBits -= width; - buffer = buffer & ((1L << totalBits) - 1); - } - } - } - - public static int bitPacking(ArrayList<Integer> numbers, int start, int bit_width,int encode_pos, byte[] encoded_result) { - int block_num = (numbers.size()-start) / 8; - for(int i=0;i<block_num;i++){ - pack8Values( numbers, start+i*8, bit_width,encode_pos, encoded_result); - encode_pos +=bit_width; - } - - return encode_pos; - - } - - public static ArrayList<Integer> decodeBitPacking( - byte[] encoded, int decode_pos, int bit_width, int block_size) { - ArrayList<Integer> result_list = new ArrayList<>(); - int block_num = (block_size - 1) / 8; - - for (int i = 0; i < block_num; i++) { // bitpacking - unpack8Values( encoded, decode_pos, bit_width, result_list); - decode_pos += bit_width; - } - return result_list; - } - - // ----------------------------------------------------------------- - - - public static int[] getAbsDeltaTsBlock( - int[] ts_block, - int i, - int block_size, - int remaining, - int[] min_delta) { - int[] ts_block_delta = new int[remaining-1]; - - int base = i*block_size+1; - int end = i*block_size+remaining; - min_delta[0]=ts_block[base-1]; - int value_delta_min = Integer.MAX_VALUE; - int value_delta_max = Integer.MIN_VALUE; - for (int j = base; j < end; j++) { - int epsilon_v = ts_block[j] - ts_block[j - 1]; - epsilon_v = zigzag(epsilon_v); - if (epsilon_v < value_delta_min) { - value_delta_min = epsilon_v; - } - if (epsilon_v > value_delta_max) { - value_delta_max = epsilon_v; - } - ts_block_delta[j-base] =epsilon_v; - - } - for (int j = 0; j < remaining-1; j++) { - ts_block_delta[j] =ts_block_delta[j]-value_delta_min; - - } - min_delta[1] = (value_delta_min); - min_delta[2] = (value_delta_max-value_delta_min); - - - return ts_block_delta; - } - public static int encodeOutlier2Bytes( - ArrayList<Integer> ts_block_delta, - int bit_width, - int encode_pos, byte[] encoded_result) { - - encode_pos = bitPacking(ts_block_delta, 0, bit_width, encode_pos, encoded_result); - - int n_k = ts_block_delta.size(); - int n_k_b = n_k / 8; - long cur_remaining = 0; // encoded int - int cur_number_bits = 0; // the bit width used of encoded int - for (int i = n_k_b * 8; i < n_k; i++) { - long cur_value = ts_block_delta.get(i); - int cur_bit_width = bit_width; // remaining bit width of current value - - if (cur_number_bits + bit_width >= 32) { - cur_remaining <<= (32 - cur_number_bits); - cur_bit_width = bit_width - 32 + cur_number_bits; - cur_remaining += ((cur_value >> cur_bit_width)); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - cur_remaining = 0; - cur_number_bits = 0; - } - - cur_remaining <<= cur_bit_width; - cur_number_bits += cur_bit_width; - cur_remaining += (((cur_value << (32 - cur_bit_width)) & 0xFFFFFFFFL) >> (32 - cur_bit_width)); - } - cur_remaining <<= (32 - cur_number_bits); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - return encode_pos; - - - } - - - public static ArrayList<Integer> decodeOutlier2Bytes( - byte[] encoded, - int decode_pos, - int bit_width, - int length, - ArrayList<Integer> encoded_pos_result - ) { - - int n_k_b = length / 8; - int remaining = length - n_k_b * 8; - ArrayList<Integer> result_list = new ArrayList<>(decodeBitPacking(encoded, decode_pos, bit_width, n_k_b * 8 + 1)); - decode_pos += n_k_b * bit_width; - - ArrayList<Long> int_remaining = new ArrayList<>(); - int int_remaining_size = remaining * bit_width / 32 + 1; - for (int j = 0; j < int_remaining_size; j++) { - int_remaining.add(bytesLong2Integer(encoded, decode_pos)); - decode_pos += 4; - } - - int cur_remaining_bits = 32; // remaining bit width of current value - long cur_number = int_remaining.get(0); - int cur_number_i = 1; - for (int i = n_k_b * 8; i < length; i++) { - if (bit_width < cur_remaining_bits) { - int tmp = (int) (cur_number >> (32 - bit_width)); - result_list.add(tmp); - cur_number <<= bit_width; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits -= bit_width; - } else { - int tmp = (int) (cur_number >> (32 - cur_remaining_bits)); - int remain_bits = bit_width - cur_remaining_bits; - tmp <<= remain_bits; - - cur_number = int_remaining.get(cur_number_i); - cur_number_i++; - tmp += (cur_number >> (32 - remain_bits)); - result_list.add(tmp); - cur_number <<= remain_bits; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits = 32 - remain_bits; - } - } - encoded_pos_result.add(decode_pos); - return result_list; - } - - private static int BOSEncodeBits(int[] ts_block_delta, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal = new ArrayList<>(); - int k1 = 0; - int k2 = 0; - - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { - final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - k1++; - - - } else if (cur_value >= final_k_end_value) { - final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { - final_normal.add(cur_value - final_k_start_value-1); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(min_delta[1],encode_pos,cur_byte); - encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - - if(k1==0 && k2==0){ - bit_width_final = getBitWith(max_delta_value); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// return encode_pos; - } - else{ - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - - bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } - } - - encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - if (k1 != 0) - encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); - if (k2 != 0) - encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); - return encode_pos; - - } - - - private static int BOSBlockEncoder(int[] ts_block, int block_i, int block_size, int remaining ,int encode_pos , byte[] cur_byte) { - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); - - - block_size = remaining-1; - int max_delta_value = min_delta[2]; - - int[] value_list = new int[block_size]; - int unique_value_count = 0; - int[] value_count_list = new int[max_delta_value+1]; - for(int value:ts_block_delta){ - if(value_count_list[value]==0){ - value_count_list[value] = 1; - value_list[unique_value_count] = value; - unique_value_count ++; - }else{ - value_count_list[value] ++; - } - } - - int left_shift = getBitWith(block_size); - int mask = (1 << left_shift) - 1; - long[] sorted_value_list = new long[unique_value_count]; - int count = 0; - - for(int i=0;i<unique_value_count;i++){ - int value = value_list[i]; - sorted_value_list[i] = (((long) value) << left_shift) + value_count_list[value]; - } - Arrays.sort(sorted_value_list); - - for(int i=0;i<unique_value_count;i++){ - count += getCount(sorted_value_list[i], mask); - sorted_value_list[i] = (((long)getUniqueValue(sorted_value_list[i], left_shift) ) << left_shift) + count;//new_value_list[i] - } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2 ) * (block_size)); - - int cur_k1 = 0; - - int x_l_plus_value = 0; // x_l_plus - int x_u_minus_value = max_delta_value; // x_u_plus - - for (int end_value_i = 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); // cur_k1 = 0 - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_x_u_minus = x_u_minus_value; - final_k_end_value = x_u_plus_value; - } - } - - int k_start_value = -1; // x_l_minus - - for (int start_value_i = 0; start_value_i < unique_value_count-1; start_value_i++) { - long k_start_valueL = sorted_value_list[start_value_i]; - k_start_value = getUniqueValue(k_start_valueL, left_shift) ; - x_l_plus_value = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - - - cur_k1 = getCount(k_start_valueL,mask); - - int k_end_value; - int cur_bits; - int cur_k2; - k_end_value = max_delta_value + 1; - - cur_bits = 0; - cur_k2 = 0; - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1) * getBitWith(k_end_value- x_l_plus_value); //cur_k2 =0 - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = max_delta_value; - } - - for (int end_value_i = start_value_i + 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - k_end_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - - cur_bits = 0; - cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = x_u_minus_value; - } - - } - } - - encode_pos = BOSEncodeBits(ts_block_delta, final_k_start_value, final_x_l_plus, final_k_end_value, final_x_u_minus, - max_delta_value, min_delta, encode_pos , cur_byte); - - return encode_pos; - } - - public static int BOSEncoder( - int[] data, int block_size, byte[] encoded_result) { - block_size++; - - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { - - encode_pos = BOSBlockEncoder(data, i, block_size,block_size, encode_pos,encoded_result); - - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } else { - - int start = block_num * block_size; - int remaining = length_all-start; - - - encode_pos = BOSBlockEncoder(data, block_num, block_size,remaining, encode_pos,encoded_result); - - -// int[] ts_block = new int[length_all-start]; -// if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, length_all - start); -// -// int supple_length; -// if (remaining_length % 8 == 0) { -// supple_length = 1; -// } else if (remaining_length % 8 == 1) { -// supple_length = 0; -// } else { -// supple_length = 9 - remaining_length % 8; -// } -// -// -// encode_pos = BOSBlockEncoder(ts_block, supple_length, encode_pos,encoded_result); - } - - - return encode_pos; - } - public static int EncodeBits(int num, - int bit_width, - int encode_pos, - byte[] cur_byte, - int[] bit_index_list){ - // 找到要插入的位的索引 - int bit_index = bit_index_list[0] ;//cur_byte[encode_pos + 1]; - - // 计算数值的起始位位置 - int remaining_bits = bit_width; - - while (remaining_bits > 0) { - // 计算在当前字节中可以使用的位数 - int available_bits = bit_index; - int bits_to_write = Math.min(available_bits, remaining_bits); - - // 更新 bit_index - bit_index = available_bits - bits_to_write; - - // 计算要写入的位的掩码和数值 - int mask = (1 << bits_to_write) - 1; - int bits = (num >> (remaining_bits - bits_to_write)) & mask; - - // 写入到当前位置 - cur_byte[encode_pos] &= (byte) ~(mask << bit_index); // 清除对应位置的位 - cur_byte[encode_pos] |= (byte) (bits << bit_index); - - // 更新位宽和数值 - remaining_bits -= bits_to_write; - if (bit_index == 0) { - bit_index = 8; - encode_pos++; - } - } - bit_index_list[0] = bit_index; -// cur_byte[encode_pos + 1] = (byte) bit_index; - return encode_pos; - } - private static int BOSEncodeBitsImprove(int[] ts_block_delta, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal = new ArrayList<>(); - int k1 = 0; - int k2 = 0; - - - - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { -// encode_pos = EncodeBits(cur_value,left_bit_width,encode_pos,cur_byte); -// final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - k1++; - - - } else if (cur_value >= final_k_end_value) { -// encode_pos = EncodeBits(cur_value- final_k_end_value,right_bit_width,encode_pos,cur_byte); -// final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { -// final_normal.add(cur_value - final_x_l_plus); -// encode_pos = EncodeBits(cur_value- final_x_l_plus,right_bit_width,encode_pos,cur_byte); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(min_delta[1],encode_pos,cur_byte); - encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - int[] bit_index_list = new int[1]; - bit_index_list[0] = 8; - - if(final_k_start_value<0 && final_k_end_value > max_delta_value){ -// int bit_width_final= getBitWith(final_x_u_minus - final_x_l_plus); -// cur_byte[encode_pos+1] = 8; - for (int cur_value : ts_block_delta) { - encode_pos = EncodeBits(cur_value, bit_width_final, encode_pos, cur_byte, bit_index_list); -// final_normal.add(cur_value); - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } -// cur_byte[encode_pos+1] = 0; -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - return encode_pos; - } - - - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } -// cur_byte[encode_pos+1] = 8; -// bit_index_list[0] = 8; - for (int cur_value : ts_block_delta) { - if (cur_value <= final_k_start_value) { - encode_pos = EncodeBits(cur_value, left_bit_width, encode_pos, cur_byte,bit_index_list); - } else if (cur_value >= final_k_end_value) { - encode_pos = EncodeBits(cur_value - final_k_end_value, right_bit_width, encode_pos, cur_byte,bit_index_list); - } else { - encode_pos = EncodeBits(cur_value - final_x_l_plus, bit_width_final, encode_pos, cur_byte,bit_index_list); - } - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } - -// cur_byte[encode_pos+1] = 0; - -// if(k1==0 && k2==0){ -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// -// -// } -// else{ -// int2Bytes(final_x_l_plus,encode_pos,cur_byte); -// encode_pos += 4; -// int2Bytes(final_k_end_value,encode_pos,cur_byte); -// encode_pos += 4; -// -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(left_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(right_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// if (final_alpha == 0) { // 0 -// -// for (int i : bitmap_outlier) { -// -// intByte2Bytes(i,encode_pos,cur_byte); -// encode_pos += 1; -// } -// } else { -// encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// } -// } - - -// if(k1+k2!=block_size) -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// if (k1 != 0) -// encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); -// if (k2 != 0) -// encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); - return encode_pos; - - } - - private static int BOSBlockEncoderImprove(int[] ts_block, int block_i, int block_size, int remaining ,int encode_pos , byte[] cur_byte) { - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); - - - block_size = remaining-1; - int max_delta_value = min_delta[2]; - - int[] value_list = new int[block_size]; - int unique_value_count = 0; - int[] value_count_list = new int[max_delta_value+1]; - for(int value:ts_block_delta){ - if(value_count_list[value]==0){ - value_count_list[value] = 1; - value_list[unique_value_count] = value; - unique_value_count ++; - }else{ - value_count_list[value] ++; - } - } - - int left_shift = getBitWith(block_size); - int mask = (1 << left_shift) - 1; - long[] sorted_value_list = new long[unique_value_count]; - int count = 0; - - for(int i=0;i<unique_value_count;i++){ - int value = value_list[i]; - sorted_value_list[i] = (((long) value) << left_shift) + value_count_list[value]; - } - Arrays.sort(sorted_value_list); - - for(int i=0;i<unique_value_count;i++){ - count += getCount(sorted_value_list[i], mask); - sorted_value_list[i] = (((long)getUniqueValue(sorted_value_list[i], left_shift) ) << left_shift) + count;//new_value_list[i] - } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2 ) * (block_size)); - - int cur_k1 = 0; - - int x_l_plus_value = 0; // x_l_plus - int x_u_minus_value = max_delta_value; // x_u_plus - - for (int end_value_i = 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); // cur_k1 = 0 - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_x_u_minus = x_u_minus_value; - final_k_end_value = x_u_plus_value; - } - } - - int k_start_value = -1; // x_l_minus - - for (int start_value_i = 0; start_value_i < unique_value_count-1; start_value_i++) { - long k_start_valueL = sorted_value_list[start_value_i]; - k_start_value = getUniqueValue(k_start_valueL, left_shift) ; - x_l_plus_value = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - - - cur_k1 = getCount(k_start_valueL,mask); - - int k_end_value; - int cur_bits; - int cur_k2; - k_end_value = max_delta_value + 1; - - cur_bits = 0; - cur_k2 = 0; - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1) * getBitWith(k_end_value- x_l_plus_value); //cur_k2 =0 - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = max_delta_value; - } - - for (int end_value_i = start_value_i + 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - k_end_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - - cur_bits = 0; - cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = x_u_minus_value; - } - - } - } - - encode_pos = BOSEncodeBitsImprove(ts_block_delta, final_k_start_value, final_x_l_plus, final_k_end_value, final_x_u_minus, - max_delta_value, min_delta, encode_pos , cur_byte); - - return encode_pos; - } - - public static int BOSEncoderImprove( - int[] data, int block_size, byte[] encoded_result) { - block_size++; - - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { - - encode_pos = BOSBlockEncoderImprove(data, i, block_size,block_size, encode_pos,encoded_result); - - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } else { - - int start = block_num * block_size; - int remaining = length_all-start; - - - encode_pos = BOSBlockEncoderImprove(data, block_num, block_size,remaining, encode_pos,encoded_result); - - -// int[] ts_block = new int[length_all-start]; -// if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, length_all - start); -// -// int supple_length; -// if (remaining_length % 8 == 0) { -// supple_length = 1; -// } else if (remaining_length % 8 == 1) { -// supple_length = 0; -// } else { -// supple_length = 9 - remaining_length % 8; -// } -// -// -// encode_pos = BOSBlockEncoder(ts_block, supple_length, encode_pos,encoded_result); - } - - - return encode_pos; - } - public static int BOSBlockDecoder(byte[] encoded, int decode_pos, int[] value_list, int block_size, int[] value_pos_arr) { - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - - int value0 = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] =value0; - value_pos_arr[0] ++; - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal= new ArrayList<>();; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int final_k_start_value = 0; - int final_k_end_value = 0; - int bit_width_final = 0; - int left_bit_width = 0; - int right_bit_width = 0; - - if(k1!=0 || k2 != 0){ - final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (block_size + k1 + k2) / (double) 8); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < block_size ) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - }else { - bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - } - - - ArrayList<Integer> decode_pos_normal = new ArrayList<>(); - final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size - k1 - k2, decode_pos_normal); - - decode_pos = decode_pos_normal.get(0); - if (k1 != 0) { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); - decode_pos = decode_pos_result_left.get(0); - } - if (k2 != 0) { - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); - decode_pos = decode_pos_result_right.get(0); - } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; - int pre_v = value0; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - - - for (int i = 0; i < block_size; i++) { - int current_delta; - if (left_outlier_i >= k1) { - if (right_outlier_i >= k2) { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } - } else if (i == final_left_outlier_index.get(left_outlier_i)) { - current_delta = final_left_outlier.get(left_outlier_i); - left_outlier_i++; - } else { - - if (right_outlier_i >= k2) { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = final_normal.get(normal_i) + final_k_start_value+1; - normal_i++; - } - } - - pre_v = deZigzag(current_delta) + min_delta + pre_v; - value_list[value_pos_arr[0]] =pre_v; - value_pos_arr[0] ++; - } - return decode_pos; - } - - public static void BOSDecoder(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - int[] value_list = new int[length_all+block_size]; - block_size --; - - int[] value_pos_arr = new int[1]; - for (int k = 0; k < block_num; k++) { - decode_pos = BOSBlockDecoder(encoded, decode_pos, value_list, block_size,value_pos_arr); - - } - - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - remain_length --; - BOSBlockDecoder(encoded, decode_pos, value_list, remain_length, value_pos_arr); - } - } - - public static int DecodeBits(byte[] cur_byte, int bit_width, int[] decode_pos_list) { - int decode_pos = decode_pos_list[0]; - int bit_index = decode_pos_list[1]; //cur_byte[decode_pos + 1]; - int remaining_bits = bit_width; - int num = 0; - - while (remaining_bits > 0) { - int available_bits = bit_index; - int bits_to_read = Math.min(available_bits, remaining_bits); - - // 计算要读取的位的掩码 - int mask = (1 << bits_to_read) - 1; - int bits = (cur_byte[decode_pos] >> (available_bits - bits_to_read)) & mask; - - // 将读取的位合并到结果中 - num = (num << bits_to_read) | bits; - - // 更新位宽和 bit_index - remaining_bits -= bits_to_read; - bit_index = available_bits - bits_to_read; - - if (bit_index == 0) { - bit_index = 8; - decode_pos++; - } - } - decode_pos_list[0] = decode_pos; - decode_pos_list[1] = bit_index; - - return num; - } - public static int BOSBlockDecoderImprove(byte[] encoded, int decode_pos, int[] value_list, int block_size, int[] value_pos_arr) { - - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - - int value0 = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] =value0; - value_pos_arr[0] ++; - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - int[] decode_list = new int[2]; - decode_list[0]= decode_pos; - decode_list[1]= 8; - - int valuePos = value_pos_arr[0]; - - if(k1==0 && k2==0){ - int pre_v = value0; - for (int i = 0; i < block_size; i++) { - int cur_delta = min_delta + DecodeBits(encoded, bit_width_final, decode_list); - pre_v += deZigzag(cur_delta); - value_list[valuePos++] = pre_v; - } - value_pos_arr[0] = valuePos; - return decode_list[0]; - } - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); -// ArrayList<Integer> final_left_outlier = new ArrayList<>(); -// ArrayList<Integer> final_right_outlier = new ArrayList<>(); -// ArrayList<Integer> final_normal= new ArrayList<>();; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int final_k_start_value = 0; - int final_k_end_value = 0; -// int bit_width_final = 0; - int left_bit_width = 0; - int right_bit_width = 0; - - final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - -// bit_width_final = bytes2Integer(encoded, decode_pos, 1); -// decode_pos += 1; - - left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (block_size + k1 + k2) / (double) 8); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < block_size ) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - - - - - -// ArrayList<Integer> decode_pos_normal = new ArrayList<>(); -// final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size - k1 - k2, decode_pos_normal); -// -// decode_pos = decode_pos_normal.get(0); -// if (k1 != 0) { -// ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); -// final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); -// decode_pos = decode_pos_result_left.get(0); -// } -// if (k2 != 0) { -// ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); -// final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); -// decode_pos = decode_pos_result_right.get(0); -// } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; - int pre_v = value0; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - -// Precompute constants - int normalOffset = min_delta + final_k_start_value; - int rightOutlierOffset = min_delta + final_k_end_value; - -// Initialize indices and pre-fetch next outlier positions - int leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - int rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - decode_list[0]= decode_pos; -// decode_list[1]= 8; - // Use a local variable for the position - for (int i = 0; i < block_size; i++) { - int currentDelta; - if (i == leftOutlierNextIndex) { - // Process left outlier - currentDelta = min_delta + DecodeBits(encoded, left_bit_width, decode_list); // final_left_outlier.get(left_outlier_i); - left_outlier_i++; - leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - } else if (i == rightOutlierNextIndex) { - // Process right outlier - currentDelta = rightOutlierOffset + DecodeBits(encoded, right_bit_width, decode_list);// final_right_outlier.get(right_outlier_i); - right_outlier_i++; - rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - } else { - // Process normal value - currentDelta = normalOffset + DecodeBits(encoded, bit_width_final, decode_list); - normal_i++; - } - - // Update the cumulative value and store it - pre_v += deZigzag(currentDelta); - value_list[valuePos++] = pre_v; - } - value_pos_arr[0] = valuePos; - if(decode_list[1]!=8){ - return decode_list[0]+1; - }else { - return decode_list[0]; - } -// decode_pos = decode_list[0]; -// Update the position in the array - - -// return decode_pos; - } - public static void BOSDecoderImprove(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - int[] value_list = new int[length_all+block_size]; - block_size --; - - int[] value_pos_arr = new int[1]; - for (int k = 0; k < block_num; k++) { - decode_pos = BOSBlockDecoderImprove(encoded, decode_pos, value_list, block_size,value_pos_arr); - - } - - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - remain_length --; - BOSBlockDecoderImprove(encoded, decode_pos, value_list, remain_length, value_pos_arr); - } - } - - public static void main(@org.jetbrains.annotations.NotNull String[] args) throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/sprintz_bos"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(256); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - -// for (int file_i = 8; file_i < 9; file_i++) { - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f = tempList[1]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - loader.readHeaders(); - - while (loader.readRecord()) { - - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - - } - - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - int repeatTime2 = 100; - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - String[] record = { - f.toString(), - "SPRINTZ+BOS-V", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - - - } - writer.close(); - - } - } - - - @Test - public void BOSImproveTimeTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R1O4_decode_time/compression_ratio/sprintz_bos_v"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 9; file_i < 10; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[2]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "SPRINTZ+BOS-V", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); -// break; - } - writer.close(); - } - } - -}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZBPTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZBPTest.java index ecd724a..0688e6a 100644 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZBPTest.java +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZBPTest.java
@@ -9,22 +9,31 @@ import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; import static java.lang.Math.pow; public class SPRINTZBPTest { public static int getBitWith(int num) { - if (num == 0) return 1; - else return 32 - Integer.numberOfLeadingZeros(num); + if (num == 0) + return 1; + else + return 32 - Integer.numberOfLeadingZeros(num); } + public static int getCount(long long1, int mask) { return ((int) (long1 & mask)); } + public static int getUniqueValue(long long1, int left_shift) { return ((int) ((long1) >> left_shift)); } + public static int findMedian(int[] arr) { if (arr == null || arr.length == 0) { throw new IllegalArgumentException("数组不能为空"); @@ -66,33 +75,37 @@ arr[i] = arr[j]; arr[j] = temp; } + public static int zigzag(int num) { - if (num < 0) return ((-num)<<1)-1; - else return num<<1; + if (num < 0) + return ((-num) << 1) - 1; + else + return num << 1; } public static int deZigzag(int num) { - if (num % 2 == 0) return num>>1; - else return -((num+1)>>1); + if (num % 2 == 0) + return num >> 1; + else + return -((num + 1) >> 1); } - public static void int2Bytes(int integer,int encode_pos , byte[] cur_byte) { + public static void int2Bytes(int integer, int encode_pos, byte[] cur_byte) { cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); + cur_byte[encode_pos + 1] = (byte) (integer >> 16); + cur_byte[encode_pos + 2] = (byte) (integer >> 8); + cur_byte[encode_pos + 3] = (byte) (integer); } - - public static void intByte2Bytes(int integer, int encode_pos , byte[] cur_byte) { + public static void intByte2Bytes(int integer, int encode_pos, byte[] cur_byte) { cur_byte[encode_pos] = (byte) (integer); } - private static void long2intBytes(long integer, int encode_pos , byte[] cur_byte) { + private static void long2intBytes(long integer, int encode_pos, byte[] cur_byte) { cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); + cur_byte[encode_pos + 1] = (byte) (integer >> 16); + cur_byte[encode_pos + 2] = (byte) (integer >> 8); + cur_byte[encode_pos + 3] = (byte) (integer); } public static int bytes2Integer(byte[] encoded, int start, int num) { @@ -119,7 +132,8 @@ return value; } - public static void pack8Values(ArrayList<Integer> values, int offset, int width, int encode_pos, byte[] encoded_result) { + public static void pack8Values(ArrayList<Integer> values, int offset, int width, int encode_pos, + byte[] encoded_result) { int bufIdx = 0; int valueIdx = offset; // remaining bits for the current unfinished Integer @@ -141,7 +155,7 @@ while (leftSize >= width && valueIdx < 8 + offset) { // encode one Integer to the 'buffer' - buffer |= (values.get(valueIdx)<< (leftSize - width)); + buffer |= (values.get(valueIdx) << (leftSize - width)); leftSize -= width; valueIdx++; } @@ -156,17 +170,17 @@ // put the buffer into the final result for (int j = 0; j < 4; j++) { encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); - encode_pos ++; + encode_pos++; bufIdx++; if (bufIdx >= width) { - return ; + return; } } } } - public static void unpack8Values(byte[] encoded, int offset,int width, ArrayList<Integer> result_list) { + public static void unpack8Values(byte[] encoded, int offset, int width, ArrayList<Integer> result_list) { int byteIdx = offset; long buffer = 0; // total bits which have read from 'buf' to 'buffer'. i.e., @@ -187,7 +201,7 @@ // then decode one Integer one by one until left bits in 'buffer' is // not enough to decode one Integer. while (totalBits >= width && valueIdx < 8) { - result_list.add ((int) (buffer >>> (totalBits - width))); + result_list.add((int) (buffer >>> (totalBits - width))); valueIdx++; totalBits -= width; buffer = buffer & ((1L << totalBits) - 1); @@ -195,11 +209,12 @@ } } - public static int bitPacking(ArrayList<Integer> numbers, int start, int bit_width,int encode_pos, byte[] encoded_result) { - int block_num = (numbers.size()-start) / 8; - for(int i=0;i<block_num;i++){ - pack8Values( numbers, start+i*8, bit_width,encode_pos, encoded_result); - encode_pos +=bit_width; + public static int bitPacking(ArrayList<Integer> numbers, int start, int bit_width, int encode_pos, + byte[] encoded_result) { + int block_num = (numbers.size() - start) / 8; + for (int i = 0; i < block_num; i++) { + pack8Values(numbers, start + i * 8, bit_width, encode_pos, encoded_result); + encode_pos += bit_width; } return encode_pos; @@ -212,7 +227,7 @@ int block_num = (block_size - 1) / 8; for (int i = 0; i < block_num; i++) { // bitpacking - unpack8Values( encoded, decode_pos, bit_width, result_list); + unpack8Values(encoded, decode_pos, bit_width, result_list); decode_pos += bit_width; } return result_list; @@ -226,11 +241,11 @@ int block_size, int remaining, int[] min_delta) { - int[] ts_block_delta = new int[remaining-1]; + int[] ts_block_delta = new int[remaining - 1]; - int base = i*block_size+1; - int end = i*block_size+remaining; - min_delta[0]=ts_block[base-1]; + int base = i * block_size + 1; + int end = i * block_size + remaining; + min_delta[0] = ts_block[base - 1]; int value_delta_min = Integer.MAX_VALUE; int value_delta_max = Integer.MIN_VALUE; for (int j = base; j < end; j++) { @@ -242,16 +257,15 @@ if (epsilon_v > value_delta_max) { value_delta_max = epsilon_v; } - ts_block_delta[j-base] =epsilon_v; + ts_block_delta[j - base] = epsilon_v; } - for (int j = 0; j < remaining-1; j++) { - ts_block_delta[j] =ts_block_delta[j]-value_delta_min; + for (int j = 0; j < remaining - 1; j++) { + ts_block_delta[j] = ts_block_delta[j] - value_delta_min; } min_delta[1] = (value_delta_min); - min_delta[2] = (value_delta_max-value_delta_min); - + min_delta[2] = (value_delta_max - value_delta_min); return ts_block_delta; } @@ -259,7 +273,7 @@ public static int encodeOutlier2Bytes( ArrayList<Integer> ts_block_delta, int bit_width, - int encode_pos, byte[] encoded_result) { + int encode_pos, byte[] encoded_result) { encode_pos = bitPacking(ts_block_delta, 0, bit_width, encode_pos, encoded_result); @@ -275,7 +289,7 @@ cur_remaining <<= (32 - cur_number_bits); cur_bit_width = bit_width - 32 + cur_number_bits; cur_remaining += ((cur_value >> cur_bit_width)); - long2intBytes(cur_remaining,encode_pos,encoded_result); + long2intBytes(cur_remaining, encode_pos, encoded_result); encode_pos += 4; cur_remaining = 0; cur_number_bits = 0; @@ -286,25 +300,23 @@ cur_remaining += (((cur_value << (32 - cur_bit_width)) & 0xFFFFFFFFL) >> (32 - cur_bit_width)); } cur_remaining <<= (32 - cur_number_bits); - long2intBytes(cur_remaining,encode_pos,encoded_result); + long2intBytes(cur_remaining, encode_pos, encoded_result); encode_pos += 4; return encode_pos; - } - public static ArrayList<Integer> decodeOutlier2Bytes( byte[] encoded, int decode_pos, int bit_width, int length, - ArrayList<Integer> encoded_pos_result - ) { + ArrayList<Integer> encoded_pos_result) { int n_k_b = length / 8; int remaining = length - n_k_b * 8; - ArrayList<Integer> result_list = new ArrayList<>(decodeBitPacking(encoded, decode_pos, bit_width, n_k_b * 8 + 1)); + ArrayList<Integer> result_list = new ArrayList<>( + decodeBitPacking(encoded, decode_pos, bit_width, n_k_b * 8 + 1)); decode_pos += n_k_b * bit_width; ArrayList<Long> int_remaining = new ArrayList<>(); @@ -343,14 +355,14 @@ } private static int BOSEncodeBits(int[] ts_block_delta, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - int encode_pos, - byte[] cur_byte) { + int final_k_start_value, + int final_x_l_plus, + int final_k_end_value, + int final_x_u_minus, + int max_delta_value, + int[] min_delta, + int encode_pos, + byte[] cur_byte) { int block_size = ts_block_delta.length; ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); @@ -366,7 +378,7 @@ int cur_index_bitmap_outlier_bits = 0; for (int i = 0; i < block_size; i++) { int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { + if (cur_value <= final_k_start_value) { final_left_outlier.add(cur_value); final_left_outlier_index.add(i); if (cur_index_bitmap_outlier_bits % 8 != 7) { @@ -382,7 +394,6 @@ } k1++; - } else if (cur_value >= final_k_end_value) { final_right_outlier.add(cur_value - final_k_end_value); final_right_outlier_index.add(i); @@ -400,7 +411,7 @@ k2++; } else { - final_normal.add(cur_value - final_k_start_value-1); + final_normal.add(cur_value - final_k_start_value - 1); index_bitmap_outlier <<= 1; cur_index_bitmap_outlier_bits += 1; } @@ -416,105 +427,102 @@ bitmap_outlier.add(index_bitmap_outlier); } - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - + int final_alpha = ((k1 + k2) * getBitWith(block_size - 1)) <= (block_size + k1 + k2) ? 1 : 0; int k_byte = (k1 << 1); k_byte += final_alpha; k_byte += (k2 << 16); - - int2Bytes(k_byte,encode_pos,cur_byte); + int2Bytes(k_byte, encode_pos, cur_byte); encode_pos += 4; - int2Bytes(min_delta[0],encode_pos,cur_byte); + int2Bytes(min_delta[0], encode_pos, cur_byte); encode_pos += 4; - int2Bytes(min_delta[1],encode_pos,cur_byte); + int2Bytes(min_delta[1], encode_pos, cur_byte); encode_pos += 4; int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min + int left_bit_width = getBitWith(final_k_start_value);// final_left_max + int right_bit_width = getBitWith(max_delta_value - final_k_end_value);// final_right_min - if(k1==0 && k2==0){ + if (k1 == 0 && k2 == 0) { bit_width_final = getBitWith(max_delta_value); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); + intByte2Bytes(bit_width_final, encode_pos, cur_byte); encode_pos += 1; -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// return encode_pos; - } - else{ - int2Bytes(final_x_l_plus,encode_pos,cur_byte); + // encode_pos = encodeOutlier2Bytes(final_normal, + // bit_width_final,encode_pos,cur_byte); + // return encode_pos; + } else { + int2Bytes(final_x_l_plus, encode_pos, cur_byte); encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); + int2Bytes(final_k_end_value, encode_pos, cur_byte); encode_pos += 4; bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); + intByte2Bytes(bit_width_final, encode_pos, cur_byte); encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); + intByte2Bytes(left_bit_width, encode_pos, cur_byte); encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); + intByte2Bytes(right_bit_width, encode_pos, cur_byte); encode_pos += 1; if (final_alpha == 0) { // 0 for (int i : bitmap_outlier) { - intByte2Bytes(i,encode_pos,cur_byte); + intByte2Bytes(i, encode_pos, cur_byte); encode_pos += 1; } } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); + encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size - 1), encode_pos, + cur_byte); + encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size - 1), encode_pos, + cur_byte); } } - encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); + encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final, encode_pos, cur_byte); if (k1 != 0) - encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); + encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width, encode_pos, cur_byte); if (k2 != 0) - encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); + encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width, encode_pos, cur_byte); return encode_pos; } - - private static int BOSBlockEncoder(int[] ts_block, int block_i, int block_size, int remaining ,int encode_pos , byte[] cur_byte) { + private static int BOSBlockEncoder(int[] ts_block, int block_i, int block_size, int remaining, int encode_pos, + byte[] cur_byte) { int[] min_delta = new int[3]; int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); - - block_size = remaining-1; + block_size = remaining - 1; int max_delta_value = min_delta[2]; - - int2Bytes(min_delta[0],encode_pos,cur_byte); + int2Bytes(min_delta[0], encode_pos, cur_byte); encode_pos += 4; - int2Bytes(min_delta[1],encode_pos,cur_byte); + int2Bytes(min_delta[1], encode_pos, cur_byte); encode_pos += 4; int bit_width_final = getBitWith(max_delta_value); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); + intByte2Bytes(bit_width_final, encode_pos, cur_byte); encode_pos += 1; ArrayList<Integer> final_normal = new ArrayList<>(); - for(int i : ts_block_delta){ + for (int i : ts_block_delta) { final_normal.add(i); } - encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - + encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final, encode_pos, cur_byte); return encode_pos; } public static int EncodeBits(int num, - int bit_width, - int encode_pos, - byte[] cur_byte, - int[] bit_index_list){ + int bit_width, + int encode_pos, + byte[] cur_byte, + int[] bit_index_list) { // 找到要插入的位的索引 - int bit_index = bit_index_list[0] ;//cur_byte[encode_pos + 1]; + int bit_index = bit_index_list[0];// cur_byte[encode_pos + 1]; // 计算数值的起始位位置 int remaining_bits = bit_width; @@ -543,33 +551,37 @@ } } bit_index_list[0] = bit_index; -// cur_byte[encode_pos + 1] = (byte) bit_index; + // cur_byte[encode_pos + 1] = (byte) bit_index; return encode_pos; } - private static int BOSBlockEncoderImprove(int[] ts_block, int block_i, int block_size, int remaining ,int encode_pos , byte[] cur_byte) { + + private static int BOSBlockEncoderImprove(int[] ts_block, int block_i, int block_size, int remaining, + int encode_pos, byte[] cur_byte) { int[] min_delta = new int[3]; int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); - int2Bytes(min_delta[0],encode_pos,cur_byte); + int2Bytes(min_delta[0], encode_pos, cur_byte); encode_pos += 4; - int2Bytes(min_delta[1],encode_pos,cur_byte); + int2Bytes(min_delta[1], encode_pos, cur_byte); encode_pos += 4; int bit_width_final = getBitWith(min_delta[2]); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); + intByte2Bytes(bit_width_final, encode_pos, cur_byte); encode_pos += 1; -// ArrayList<Integer> final_normal = new ArrayList<>(); + // ArrayList<Integer> final_normal = new ArrayList<>(); int[] bit_index_list = new int[1]; bit_index_list[0] = 8; - for(int value:ts_block_delta){ + for (int value : ts_block_delta) { encode_pos = EncodeBits(value, bit_width_final, encode_pos, cur_byte, bit_index_list); } - if(bit_index_list[0] != 8){ - encode_pos ++; + if (bit_index_list[0] != 8) { + encode_pos++; } -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); + // encode_pos = encodeOutlier2Bytes(final_normal, + // bit_width_final,encode_pos,cur_byte); return encode_pos; } + public static int BOSEncoderImprove( int[] data, int block_size, byte[] encoded_result) { block_size++; @@ -577,15 +589,15 @@ int length_all = data.length; int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); + int2Bytes(length_all, encode_pos, encoded_result); encode_pos += 4; int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; + int2Bytes(block_size, encode_pos, encoded_result); + encode_pos += 4; for (int i = 0; i < block_num; i++) { - encode_pos = BOSBlockEncoderImprove(data, i, block_size, block_size,encode_pos,encoded_result); + encode_pos = BOSBlockEncoderImprove(data, i, block_size, block_size, encode_pos, encoded_result); } int remaining_length = length_all - block_num * block_size; @@ -595,35 +607,33 @@ encode_pos += 4; } - } - else { + } else { int start = block_num * block_size; - int remaining = length_all-start; - encode_pos = BOSBlockEncoderImprove(data, block_num, block_size,remaining, encode_pos,encoded_result); + int remaining = length_all - start; + encode_pos = BOSBlockEncoderImprove(data, block_num, block_size, remaining, encode_pos, encoded_result); } - return encode_pos; } + public static int BOSEncoder( int[] data, int block_size, byte[] encoded_result) { block_size++; - int length_all = data.length; int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); + int2Bytes(length_all, encode_pos, encoded_result); encode_pos += 4; int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; + int2Bytes(block_size, encode_pos, encoded_result); + encode_pos += 4; for (int i = 0; i < block_num; i++) { - encode_pos = BOSBlockEncoder(data, i, block_size,block_size, encode_pos,encoded_result); + encode_pos = BOSBlockEncoder(data, i, block_size, block_size, encode_pos, encoded_result); } @@ -637,64 +647,62 @@ } else { int start = block_num * block_size; - int remaining = length_all-start; + int remaining = length_all - start; + encode_pos = BOSBlockEncoder(data, block_num, block_size, remaining, encode_pos, encoded_result); - encode_pos = BOSBlockEncoder(data, block_num, block_size,remaining, encode_pos,encoded_result); - - -// int[] ts_block = new int[length_all-start]; -// if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, length_all - start); -// -// int supple_length; -// if (remaining_length % 8 == 0) { -// supple_length = 1; -// } else if (remaining_length % 8 == 1) { -// supple_length = 0; -// } else { -// supple_length = 9 - remaining_length % 8; -// } -// -// -// encode_pos = BOSBlockEncoder(ts_block, supple_length, encode_pos,encoded_result); + // int[] ts_block = new int[length_all-start]; + // if (length_all - start >= 0) System.arraycopy(data, start, ts_block, 0, + // length_all - start); + // + // int supple_length; + // if (remaining_length % 8 == 0) { + // supple_length = 1; + // } else if (remaining_length % 8 == 1) { + // supple_length = 0; + // } else { + // supple_length = 9 - remaining_length % 8; + // } + // + // + // encode_pos = BOSBlockEncoder(ts_block, supple_length, + // encode_pos,encoded_result); } - return encode_pos; } + public static int BOSBlockDecoder(byte[] encoded, int decode_pos, int[] value_list, int block_size, + int[] value_pos_arr) { - public static int BOSBlockDecoder(byte[] encoded, int decode_pos, int[] value_list, int block_size, int[] value_pos_arr) { - - ArrayList<Integer> final_normal= new ArrayList<>();; + ArrayList<Integer> final_normal = new ArrayList<>(); + ; ArrayList<Integer> bitmap_outlier = new ArrayList<>(); int bit_width_final = 0; int value0 = bytes2Integer(encoded, decode_pos, 4); decode_pos += 4; - value_list[value_pos_arr[0]] =value0; - value_pos_arr[0] ++; + value_list[value_pos_arr[0]] = value0; + value_pos_arr[0]++; int min_delta = bytes2Integer(encoded, decode_pos, 4); decode_pos += 4; - bit_width_final = bytes2Integer(encoded, decode_pos, 1); decode_pos += 1; ArrayList<Integer> decode_pos_normal = new ArrayList<>(); - final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size , decode_pos_normal); + final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size, decode_pos_normal); decode_pos = decode_pos_normal.get(0); int normal_i = 0; int pre_v = value0; - for (int i = 0; i < block_size; i++) { - int current_delta= final_normal.get(normal_i); - pre_v = deZigzag(current_delta + min_delta) + pre_v; - value_list[value_pos_arr[0]] =pre_v; - value_pos_arr[0] ++; + int current_delta = final_normal.get(normal_i); + pre_v = deZigzag(current_delta + min_delta) + pre_v; + value_list[value_pos_arr[0]] = pre_v; + value_pos_arr[0]++; } return decode_pos; } @@ -710,15 +718,15 @@ int block_num = length_all / block_size; int remain_length = length_all - block_num * block_size; - int[] value_list = new int[length_all+block_size]; - block_size --; + int[] value_list = new int[length_all + block_size]; + block_size--; int[] value_pos_arr = new int[1]; -// System.out.println(length_all); -// System.out.println(encoded.length); + // System.out.println(length_all); + // System.out.println(encoded.length); for (int k = 0; k < block_num; k++) { -// System.out.println(k); - decode_pos = BOSBlockDecoder(encoded, decode_pos, value_list, block_size,value_pos_arr); + // System.out.println(k); + decode_pos = BOSBlockDecoder(encoded, decode_pos, value_list, block_size, value_pos_arr); } @@ -730,14 +738,14 @@ value_pos_arr[0]++; } } else { - remain_length --; + remain_length--; BOSBlockDecoder(encoded, decode_pos, value_list, remain_length, value_pos_arr); } } public static int DecodeBits(byte[] cur_byte, int bit_width, int[] decode_pos_list) { int decode_pos = decode_pos_list[0]; - int bit_index = decode_pos_list[1]; //cur_byte[decode_pos + 1]; + int bit_index = decode_pos_list[1]; // cur_byte[decode_pos + 1]; int remaining_bits = bit_width; int num = 0; @@ -766,13 +774,14 @@ return num; } - public static int BOSBlockDecoderImprove(byte[] encoded, int decode_pos, int[] value_list, int block_size, int[] value_pos_arr) { + public static int BOSBlockDecoderImprove(byte[] encoded, int decode_pos, int[] value_list, int block_size, + int[] value_pos_arr) { int value0 = bytes2Integer(encoded, decode_pos, 4); decode_pos += 4; - value_list[value_pos_arr[0]] =value0; - value_pos_arr[0] ++; + value_list[value_pos_arr[0]] = value0; + value_pos_arr[0]++; int min_delta = bytes2Integer(encoded, decode_pos, 4); decode_pos += 4; @@ -781,38 +790,40 @@ decode_pos += 1; int[] decode_list = new int[2]; - decode_list[0]= decode_pos; - decode_list[1]= 8; + decode_list[0] = decode_pos; + decode_list[1] = 8; int pre_v = value0; for (int i = 0; i < block_size; i++) { - int cur_delta = min_delta + DecodeBits(encoded, bit_width_final, decode_list); + int cur_delta = min_delta + DecodeBits(encoded, bit_width_final, decode_list); pre_v += deZigzag(cur_delta); value_list[value_pos_arr[0]++] = pre_v; } - if(decode_list[1]!=8){ - return decode_list[0]+1; - }else { + if (decode_list[1] != 8) { + return decode_list[0] + 1; + } else { return decode_list[0]; } -// value_pos_arr[0] = valuePos; -// return decode_list[0]; + // value_pos_arr[0] = valuePos; + // return decode_list[0]; -// ArrayList<Integer> decode_pos_normal = new ArrayList<>(); -// ArrayList<Integer> final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size, decode_pos_normal); -// -// decode_pos = decode_pos_normal.get(0); -// int normal_i = 0; -//// int pre_v = value0; -// -// for (int i = 0; i < block_size; i++) { -// int current_delta = min_delta + final_normal.get(normal_i) ; -// pre_v = current_delta + pre_v; -// value_list[value_pos_arr[0]] = pre_v; -// value_pos_arr[0]++; -// } -// -// return decode_pos; + // ArrayList<Integer> decode_pos_normal = new ArrayList<>(); + // ArrayList<Integer> final_normal = decodeOutlier2Bytes(encoded, decode_pos, + // bit_width_final, block_size, decode_pos_normal); + // + // decode_pos = decode_pos_normal.get(0); + // int normal_i = 0; + //// int pre_v = value0; + // + // for (int i = 0; i < block_size; i++) { + // int current_delta = min_delta + final_normal.get(normal_i) ; + // pre_v = current_delta + pre_v; + // value_list[value_pos_arr[0]] = pre_v; + // value_pos_arr[0]++; + // } + // + // return decode_pos; } + public static void BOSDecoderImprove(byte[] encoded) { int decode_pos = 0; @@ -821,20 +832,16 @@ int block_size = bytes2Integer(encoded, decode_pos, 4); decode_pos += 4; - - int block_num = length_all / block_size; int remain_length = length_all - block_num * block_size; - - int[] value_list = new int[length_all+block_size]; + int[] value_list = new int[length_all + block_size]; block_size--; int[] value_pos_arr = new int[1]; for (int k = 0; k < block_num; k++) { - - decode_pos = BOSBlockDecoderImprove(encoded, decode_pos, value_list, block_size,value_pos_arr); + decode_pos = BOSBlockDecoderImprove(encoded, decode_pos, value_list, block_size, value_pos_arr); } @@ -846,94 +853,225 @@ value_pos_arr[0]++; } } else { - remain_length --; + remain_length--; BOSBlockDecoderImprove(encoded, decode_pos, value_list, remain_length, value_pos_arr); } } - public static void main(@org.jetbrains.annotations.NotNull String[] args) throws IOException { - int repeatTime2 = 100; - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/sprintz"; + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testSubcolumn() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/"; + // String output_parent_dir = parent_dir + "result/"; + + String outputPath = output_parent_dir + "sprintz.csv"; + + int block_size = 1024; + + int repeatTime = 100; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); // write header to output file + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + // f = tempList[1]; + // System.out.println(f); + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + // ArrayList<Integer> data2 = new ArrayList<>(); + + // loader.readHeaders(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + // String value = loader.getValues()[index]; + data1.add(Float.valueOf(f_str)); + // data2.add(Integer.valueOf(loader.getValues()[1])); + // data.add(Integer.valueOf(value)); + } + + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = BOSEncoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + ratio += ratioTmp; + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + BOSDecoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "SPRINTZ", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + System.out.println(ratio); + + } + writer.close(); + + } + + @Test + public void testTransData() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String output_parent_dir = "D:/encoding-subcolumn/trans_data_result/"; + // String output_parent_dir = parent_dir + "trans_data_result/"; + String input_parent_dir = parent_dir + "trans_data/"; + ArrayList<String> input_path_list = new ArrayList<>(); ArrayList<String> output_path_list = new ArrayList<>(); ArrayList<String> dataset_name = new ArrayList<>(); ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); + try (Stream<Path> paths = Files.walk(Paths.get(input_parent_dir))) { + paths.filter(Files::isDirectory) + .filter(path -> !path.equals(Paths.get(input_parent_dir))) + .forEach(dir -> { + String name = dir.getFileName().toString(); + dataset_name.add(name); + input_path_list.add(dir.toString()); + dataset_block_size.add(1024); + }); } - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(256); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); + String outputPath = output_parent_dir + "sprintz.csv"; + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); -// for (int file_i = 7; file_i < 8; file_i++) { + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + int repeatTime = 100; + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { String inputPath = input_path_list.get(file_i); System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); File[] tempList = file.listFiles(); - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; + long totalEncodeTime = 0; + long totalDecodeTime = 0; + double totalCompressedSize = 0; + int totalPoints = 0; for (File f : tempList) { -// f = tempList[1]; - System.out.println(f); + String datasetName = extractFileName(f.toString()); InputStream inputStream = Files.newInputStream(f.toPath()); CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); @@ -941,62 +1079,67 @@ ArrayList<Integer> data2 = new ArrayList<>(); loader.readHeaders(); - while (loader.readRecord()) { - + // String value = loader.getValues()[index]; data1.add(Integer.valueOf(loader.getValues()[0])); data2.add(Integer.valueOf(loader.getValues()[1])); - + // data.add(Integer.valueOf(value)); } - inputStream.close(); int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ + for (int i = 0; i < data2.size(); i++) { data2_arr[i] = data2.get(i); } - byte[] encoded_result = new byte[data2_arr.length*4]; + byte[] encoded_result = new byte[data2_arr.length * 4]; long encodeTime = 0; long decodeTime = 0; double ratio = 0; double compressed_size = 0; - int length = 0; long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); } long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); + encodeTime += ((e - s) / repeatTime); compressed_size += length; double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); ratio += ratioTmp; s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) + + for (int repeat = 0; repeat < repeatTime; repeat++) { BOSDecoder(encoded_result); + } + e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); + decodeTime += ((e - s) / repeatTime); - String[] record = { - f.toString(), - "SPRINTZ", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - - + totalEncodeTime += encodeTime; + totalDecodeTime += decodeTime; + totalCompressedSize += compressed_size; + totalPoints += data1.size(); + } - writer.close(); + double compressionRatio = totalCompressedSize / (totalPoints * Integer.BYTES); + + String[] record = { + dataset_name.get(file_i), + "SPRINTZ", + String.valueOf(totalEncodeTime), + String.valueOf(totalDecodeTime), + String.valueOf(totalPoints), + String.valueOf(totalCompressedSize), + String.valueOf(compressionRatio) + }; + + writer.writeRecord(record); + System.out.println(compressionRatio); } + writer.close(); } - }
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZSubcolumnTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZSubcolumnTest.java new file mode 100644 index 0000000..a8c8dc9 --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SPRINTZSubcolumnTest.java
@@ -0,0 +1,502 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +public class SPRINTZSubcolumnTest { + + public static int Encoder(int[] data, int block_size, byte[] encoded_result) { + int data_length = data.length; + int encode_pos = 0; + + encoded_result[0] = (byte) (data_length >> 24); + encoded_result[1] = (byte) (data_length >> 16); + encoded_result[2] = (byte) (data_length >> 8); + encoded_result[3] = (byte) data_length; + encode_pos += 4; + + encoded_result[4] = (byte) (block_size >> 24); + encoded_result[5] = (byte) (block_size >> 16); + encoded_result[6] = (byte) (block_size >> 8); + encoded_result[7] = (byte) block_size; + encode_pos += 4; + + int num_blocks = data_length / block_size; + + int remainder = data_length % block_size; + + int[] beta = new int[1]; + beta[0] = 2; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockEncoder(data, i, block_size, block_size, encode_pos, encoded_result, beta); + } + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + int value = data[num_blocks * block_size + i]; + encoded_result[encode_pos] = (byte) (value >> 24); + encoded_result[encode_pos + 1] = (byte) (value >> 16); + encoded_result[encode_pos + 2] = (byte) (value >> 8); + encoded_result[encode_pos + 3] = (byte) value; + encode_pos += 4; + } + } else { + encode_pos = BlockEncoder(data, num_blocks, block_size, remainder, encode_pos, + encoded_result, beta); + } + + return encode_pos; + } + + public static int[] Decoder(byte[] encoded_result) { + int encode_pos = 0; + + int data_length = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int block_size = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + int[] data = new int[data_length]; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockDecoder(encoded_result, i, block_size, block_size, encode_pos, data); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + data[num_blocks * block_size + i] = ((encoded_result[encode_pos] & 0xFF) << 24) | + ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + } + } else { + encode_pos = BlockDecoder(encoded_result, num_blocks, block_size, remainder, + encode_pos, data); + } + + return data; + } + + public static int zigzag(int num) { + if (num < 0) + return ((-num) << 1) - 1; + else + return num << 1; + } + + public static int deZigzag(int num) { + if (num % 2 == 0) + return num >> 1; + else + return -((num + 1) >> 1); + } + + public static int[] getAbsDeltaTsBlock( + int[] ts_block, + int i, + int block_size, + int remaining, + int[] min_delta) { + int[] ts_block_delta = new int[remaining - 1]; + + int base = i * block_size + 1; + int end = i * block_size + remaining; + min_delta[0] = ts_block[base - 1]; + int value_delta_min = Integer.MAX_VALUE; + int value_delta_max = Integer.MIN_VALUE; + for (int j = base; j < end; j++) { + int epsilon_v = ts_block[j] - ts_block[j - 1]; + epsilon_v = zigzag(epsilon_v); + if (epsilon_v < value_delta_min) { + value_delta_min = epsilon_v; + } + if (epsilon_v > value_delta_max) { + value_delta_max = epsilon_v; + } + ts_block_delta[j - base] = epsilon_v; + + } + for (int j = 0; j < remaining - 1; j++) { + ts_block_delta[j] = ts_block_delta[j] - value_delta_min; + + } + min_delta[1] = (value_delta_min); + min_delta[2] = (value_delta_max - value_delta_min); + + return ts_block_delta; + } + + public static int BlockEncoder(int[] data, int block_index, int block_size, int remainder, + int encode_pos, byte[] encoded_result, int[] beta) { + int[] min_delta = new int[3]; + + // data_delta 长度为 remainder - 1 + int[] data_delta = getAbsDeltaTsBlock(data, block_index, block_size, remainder, min_delta); + + encoded_result[encode_pos] = (byte) (min_delta[0] >> 24); + encoded_result[encode_pos + 1] = (byte) (min_delta[0] >> 16); + encoded_result[encode_pos + 2] = (byte) (min_delta[0] >> 8); + encoded_result[encode_pos + 3] = (byte) min_delta[0]; + encode_pos += 4; + + encoded_result[encode_pos] = (byte) (min_delta[1] >> 24); + encoded_result[encode_pos + 1] = (byte) (min_delta[1] >> 16); + encoded_result[encode_pos + 2] = (byte) (min_delta[1] >> 8); + encoded_result[encode_pos + 3] = (byte) min_delta[1]; + encode_pos += 4; + + if (block_index == 0) { + int maxValue = 0; + for (int j = 0; j < remainder - 1; j++) { + if (data_delta[j] > maxValue) { + maxValue = data_delta[j]; + } + } + int m = SubcolumnTest.bitWidth(maxValue); + + beta[0] = SubcolumnTest.Subcolumn(data_delta, remainder - 1, m, block_size); + } + + encode_pos = SubcolumnTest.SubcolumnEncoder(data_delta, encode_pos, encoded_result, beta, block_size); + + return encode_pos; + } + + public static int BlockDecoder(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int[] data) { + int[] min_delta = new int[3]; + + min_delta[0] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + min_delta[1] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int[] data_delta = new int[remainder - 1]; + + encode_pos = SubcolumnTest.SubcolumnDecoder(encoded_result, encode_pos, data_delta, block_size); + + for (int i = 0; i < remainder - 1; i++) { + data_delta[i] = data_delta[i] + min_delta[1]; + } + + for (int i = 0; i < remainder - 1; i++) { + data_delta[i] = deZigzag(data_delta[i]); + } + + data[block_index * block_size] = min_delta[0]; + + for (int i = 0; i < remainder - 1; i++) { + data[block_index * block_size + i + 1] = data[block_index * block_size + i] + data_delta[i]; + } + + return encode_pos; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testSubcolumn() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/"; + // String output_parent_dir = parent_dir + "result/"; + + String outputPath = output_parent_dir + "sprintz_subcolumn.csv"; + + int block_size = 512; + + int repeatTime = 100; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = Encoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + ratio += ratioTmp; + + int[] data2_arr_decoded = new int[data2_arr.length]; + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + data2_arr_decoded = Decoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "SPRINTZ+Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + System.out.println(ratio); + } + + writer.close(); + } + + @Test + public void testTransData() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String output_parent_dir = "D:/encoding-subcolumn/trans_data_result/"; + // String output_parent_dir = parent_dir + "trans_data_result/"; + + String input_parent_dir = parent_dir + "trans_data/"; + + ArrayList<String> input_path_list = new ArrayList<>(); + ArrayList<String> output_path_list = new ArrayList<>(); + ArrayList<String> dataset_name = new ArrayList<>(); + ArrayList<Integer> dataset_block_size = new ArrayList<>(); + + try (Stream<Path> paths = Files.walk(Paths.get(input_parent_dir))) { + paths.filter(Files::isDirectory) + .filter(path -> !path.equals(Paths.get(input_parent_dir))) + .forEach(dir -> { + String name = dir.getFileName().toString(); + dataset_name.add(name); + input_path_list.add(dir.toString()); + dataset_block_size.add(1024); + }); + } + + String outputPath = output_parent_dir + "sprintz_subcolumn.csv"; + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + int repeatTime = 100; + + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { + + String inputPath = input_path_list.get(file_i); + System.out.println(inputPath); + + File file = new File(inputPath); + File[] tempList = file.listFiles(); + + long totalEncodeTime = 0; + long totalDecodeTime = 0; + double totalCompressedSize = 0; + int totalPoints = 0; + + for (File f : tempList) { + String datasetName = extractFileName(f.toString()); + InputStream inputStream = Files.newInputStream(f.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Integer> data1 = new ArrayList<>(); + ArrayList<Integer> data2 = new ArrayList<>(); + + loader.readHeaders(); + while (loader.readRecord()) { + // String value = loader.getValues()[index]; + data1.add(Integer.valueOf(loader.getValues()[0])); + data2.add(Integer.valueOf(loader.getValues()[1])); + // data.add(Integer.valueOf(value)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + for (int i = 0; i < data2.size(); i++) { + data2_arr[i] = data2.get(i); + } + byte[] encoded_result = new byte[data2_arr.length * 4]; + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = Encoder(data2_arr, dataset_block_size.get(file_i), encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + ratio += ratioTmp; + s = System.nanoTime(); + + int[] data2_arr_decoded = new int[data1.size()]; + + for (int repeat = 0; repeat < repeatTime; repeat++) { + data2_arr_decoded = Decoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + totalEncodeTime += encodeTime; + totalDecodeTime += decodeTime; + totalCompressedSize += compressed_size; + totalPoints += data1.size(); + + } + + double compressionRatio = totalCompressedSize / (totalPoints * Integer.BYTES); + + String[] record = { + dataset_name.get(file_i), + "SPRINTZ+Sub-columns", + String.valueOf(totalEncodeTime), + String.valueOf(totalDecodeTime), + String.valueOf(totalPoints), + String.valueOf(totalCompressedSize), + String.valueOf(compressionRatio) + }; + + writer.writeRecord(record); + System.out.println(compressionRatio); + } + writer.close(); + } + +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnBetaTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnBetaTest.java new file mode 100644 index 0000000..5a4ed89 --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnBetaTest.java
@@ -0,0 +1,502 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +import static org.junit.Assert.assertEquals; + +public class SubcolumnBetaTest { + + public static int[] getAbsDeltaTsBlock( + int[] ts_block, + int i, + int block_size, + int remaining, + int[] min_delta) { + int[] ts_block_delta = new int[remaining]; + + int value_delta_min = Integer.MAX_VALUE; + int value_delta_max = Integer.MIN_VALUE; + int base = i * block_size; + int end = i * block_size + remaining; + + for (int j = base; j < end; j++) { + int cur = ts_block[j]; + if (cur < value_delta_min) { + value_delta_min = cur; + } + if (cur > value_delta_max) { + value_delta_max = cur; + } + } + + for (int j = base; j < end; j++) { + ts_block_delta[j - base] = ts_block[j] - value_delta_min; + } + + min_delta[0] = value_delta_min; + + return ts_block_delta; + } + + public static int BlockEncoder(int[] data, int block_index, int block_size, int remainder, + int encode_pos, byte[] encoded_result, int[] beta) { + int[] min_delta = new int[3]; + + int[] data_delta = getAbsDeltaTsBlock(data, block_index, block_size, + remainder, min_delta); + + encoded_result[encode_pos] = (byte) (min_delta[0] >> 24); + encoded_result[encode_pos + 1] = (byte) (min_delta[0] >> 16); + encoded_result[encode_pos + 2] = (byte) (min_delta[0] >> 8); + encoded_result[encode_pos + 3] = (byte) min_delta[0]; + encode_pos += 4; + + encode_pos = SubcolumnTest.SubcolumnEncoder(data_delta, encode_pos, + encoded_result, beta, block_size); + + return encode_pos; + } + + public static int BlockDecoder(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int[] data) { + int[] min_delta = new int[3]; + + min_delta[0] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int[] block_data = new int[remainder]; + + encode_pos = SubcolumnTest.SubcolumnDecoder(encoded_result, encode_pos, + block_data, block_size); + + for (int i = 0; i < remainder; i++) { + data[block_index * block_size + i] = block_data[i] + min_delta[0]; + } + + return encode_pos; + } + + public static int Encoder(int[] data, int block_size, byte[] encoded_result, int beta_value) { + int data_length = data.length; + int encode_pos = 0; + + encoded_result[0] = (byte) (data_length >> 24); + encoded_result[1] = (byte) (data_length >> 16); + encoded_result[2] = (byte) (data_length >> 8); + encoded_result[3] = (byte) data_length; + encode_pos += 4; + + encoded_result[4] = (byte) (block_size >> 24); + encoded_result[5] = (byte) (block_size >> 16); + encoded_result[6] = (byte) (block_size >> 8); + encoded_result[7] = (byte) block_size; + encode_pos += 4; + + int num_blocks = data_length / block_size; + + int remainder = data_length % block_size; + + int[] beta = new int[1]; + beta[0] = beta_value; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockEncoder(data, i, block_size, block_size, encode_pos, encoded_result, beta); + } + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + int value = data[num_blocks * block_size + i]; + encoded_result[encode_pos] = (byte) (value >> 24); + encoded_result[encode_pos + 1] = (byte) (value >> 16); + encoded_result[encode_pos + 2] = (byte) (value >> 8); + encoded_result[encode_pos + 3] = (byte) value; + encode_pos += 4; + } + } else { + encode_pos = BlockEncoder(data, num_blocks, block_size, remainder, encode_pos, + encoded_result, beta); + } + + return encode_pos; + } + + public static int[] Decoder(byte[] encoded_result) { + int encode_pos = 0; + + int data_length = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) + | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int block_size = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + int[] data = new int[data_length]; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockDecoder(encoded_result, i, block_size, block_size, encode_pos, data); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + data[num_blocks * block_size + i] = ((encoded_result[encode_pos] & 0xFF) << 24) | + ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + } + } else { + encode_pos = BlockDecoder(encoded_result, num_blocks, block_size, remainder, + encode_pos, data); + } + + return data; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testSubcolumn() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/compression_vs_beta/"; + // String output_parent_dir = parent_dir + "result/compression_vs_beta/"; + + int[] beta_list = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + + // int block_size = 1024; + int block_size = 512; + + int repeatTime = 100; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int beta : beta_list) { + + String outputPath = output_parent_dir + "subcolumn_beta_" + beta + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = Encoder(data2_arr, block_size, encoded_result, beta); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + ratio += ratioTmp; + + System.out.println("Decode"); + + s = System.nanoTime(); + + int[] data2_arr_decoded = new int[data2_arr.length]; + + for (int repeat = 0; repeat < repeatTime; repeat++) { + data2_arr_decoded = Decoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + for (int i = 0; i < data2_arr_decoded.length; i++) { + assertEquals(data2_arr[i], data2_arr_decoded[i]); + } + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + System.out.println("beta: " + beta); + System.out.println(ratio); + } + + writer.close(); + } + } + + @Test + public void testTransData() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String output_parent_dir = "D:/encoding-subcolumn/trans_data_result/compression_vs_beta/"; + // String output_parent_dir = parent_dir + "trans_data_result/compression_vs_beta/"; + + String input_parent_dir = parent_dir + "trans_data/"; + + int[] beta_list = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + + ArrayList<String> input_path_list = new ArrayList<>(); + ArrayList<String> output_path_list = new ArrayList<>(); + ArrayList<String> dataset_name = new ArrayList<>(); + ArrayList<Integer> dataset_block_size = new ArrayList<>(); + + try (Stream<Path> paths = Files.walk(Paths.get(input_parent_dir))) { + paths.filter(Files::isDirectory) + .filter(path -> !path.equals(Paths.get(input_parent_dir))) + .forEach(dir -> { + String name = dir.getFileName().toString(); + dataset_name.add(name); + input_path_list.add(dir.toString()); + dataset_block_size.add(1024); + }); + } + + // for (String name : dataset_name) { + // output_path_list.add(output_parent_dir + name + "_ratio.csv"); + // } + + for (int beta : beta_list) { + + String outputPath = output_parent_dir + "subcolumn_trans_data_beta_" + beta + ".csv"; + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + int repeatTime = 100; + + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { + + String inputPath = input_path_list.get(file_i); + System.out.println(inputPath); + + File file = new File(inputPath); + File[] tempList = file.listFiles(); + + // CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); + // writer.setRecordDelimiter('\n'); + + // String[] head = { + // "Input Direction", + // "Encoding Algorithm", + // "Encoding Time", + // "Decoding Time", + // "Points", + // "Compressed Size", + // "Compression Ratio" + // }; + // writer.writeRecord(head); + + long totalEncodeTime = 0; + long totalDecodeTime = 0; + double totalCompressedSize = 0; + int totalPoints = 0; + + for (File f : tempList) { + String datasetName = extractFileName(f.toString()); + InputStream inputStream = Files.newInputStream(f.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Integer> data1 = new ArrayList<>(); + ArrayList<Integer> data2 = new ArrayList<>(); + + loader.readHeaders(); + while (loader.readRecord()) { + // String value = loader.getValues()[index]; + data1.add(Integer.valueOf(loader.getValues()[0])); + data2.add(Integer.valueOf(loader.getValues()[1])); + // data.add(Integer.valueOf(value)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + for (int i = 0; i < data2.size(); i++) { + data2_arr[i] = data2.get(i); + } + byte[] encoded_result = new byte[data2_arr.length * 4]; + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = Encoder(data2_arr, dataset_block_size.get(file_i), encoded_result, beta); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + ratio += ratioTmp; + s = System.nanoTime(); + + int[] data2_arr_decoded = new int[data1.size()]; + + for (int repeat = 0; repeat < repeatTime; repeat++) { + data2_arr_decoded = Decoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + totalEncodeTime += encodeTime; + totalDecodeTime += decodeTime; + totalCompressedSize += compressed_size; + totalPoints += data1.size(); + + for (int i = 0; i < data2_arr_decoded.length; i++) { + assertEquals(data2_arr[i], data2_arr_decoded[i]); + } + + } + + double compressionRatio = totalCompressedSize / (totalPoints * Integer.BYTES); + + String[] record = { + dataset_name.get(file_i), + "Sub-columns", + String.valueOf(totalEncodeTime), + String.valueOf(totalDecodeTime), + String.valueOf(totalPoints), + String.valueOf(totalCompressedSize), + String.valueOf(compressionRatio) + }; + + writer.writeRecord(record); + System.out.println(compressionRatio); + + System.out.println("beta: " + beta); + } + + writer.close(); + } + } + +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnBlockSizeTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnBlockSizeTest.java new file mode 100644 index 0000000..328749f --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnBlockSizeTest.java
@@ -0,0 +1,321 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +import static org.junit.Assert.assertEquals; + +public class SubcolumnBlockSizeTest { + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testSubcolumn() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/compression_vs_block/"; + // String output_parent_dir = parent_dir + "result/compression_vs_block/"; + + int[] block_size_list = { 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 }; + + int repeatTime = 100; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int block_size : block_size_list) { + + String outputPath = output_parent_dir + "subcolumn_block_" + block_size + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnTest.Encoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + ratio += ratioTmp; + + System.out.println("Decode"); + + s = System.nanoTime(); + + int[] data2_arr_decoded = new int[data2_arr.length]; + + for (int repeat = 0; repeat < repeatTime; repeat++) { + data2_arr_decoded = SubcolumnTest.Decoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + for (int i = 0; i < data2_arr_decoded.length; i++) { + assertEquals(data2_arr[i], data2_arr_decoded[i]); + } + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + System.out.println("Block size: " + block_size); + System.out.println(ratio); + } + + writer.close(); + } + } + + @Test + public void testTransData() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String output_parent_dir = "D:/encoding-subcolumn/trans_data_result/compression_vs_block/"; + // String output_parent_dir = parent_dir + "trans_data_result/compression_vs_block/"; + + String input_parent_dir = parent_dir + "trans_data/"; + + int[] block_size_list = { 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 }; + + ArrayList<String> input_path_list = new ArrayList<>(); + ArrayList<String> dataset_name = new ArrayList<>(); + ArrayList<Integer> dataset_block_size = new ArrayList<>(); + + try (Stream<Path> paths = Files.walk(Paths.get(input_parent_dir))) { + paths.filter(Files::isDirectory) + .filter(path -> !path.equals(Paths.get(input_parent_dir))) + .forEach(dir -> { + String name = dir.getFileName().toString(); + dataset_name.add(name); + input_path_list.add(dir.toString()); + dataset_block_size.add(1024); // Default block size, can be changed if needed + }); + } + + for (int block_size : block_size_list) { + + String outputPath = output_parent_dir + "subcolumn_trans_data_block_" + block_size + ".csv"; + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + int repeatTime = 100; + + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { + + String inputPath = input_path_list.get(file_i); + System.out.println(inputPath); + + File file = new File(inputPath); + File[] tempList = file.listFiles(); + + long totalEncodeTime = 0; + long totalDecodeTime = 0; + double totalCompressedSize = 0; + int totalPoints = 0; + + for (File f : tempList) { + String datasetName = extractFileName(f.toString()); + InputStream inputStream = Files.newInputStream(f.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Integer> data1 = new ArrayList<>(); + ArrayList<Integer> data2 = new ArrayList<>(); + + loader.readHeaders(); + while (loader.readRecord()) { + data1.add(Integer.valueOf(loader.getValues()[0])); + data2.add(Integer.valueOf(loader.getValues()[1])); + } + inputStream.close(); + int[] data2_arr = new int[data2.size()]; + for (int i = 0; i < data2.size(); i++) { + data2_arr[i] = data2.get(i); + } + byte[] encoded_result = new byte[data2_arr.length * 4]; + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnTest.Encoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + ratio += ratioTmp; + s = System.nanoTime(); + + int[] data2_arr_decoded = new int[data1.size()]; + + for (int repeat = 0; repeat < repeatTime; repeat++) { + data2_arr_decoded = SubcolumnTest.Decoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + totalEncodeTime += encodeTime; + totalDecodeTime += decodeTime; + totalCompressedSize += compressed_size; + totalPoints += data1.size(); + + for (int i = 0; i < data2_arr_decoded.length; i++) { + assertEquals(data2_arr[i], data2_arr_decoded[i]); + } + } + + double compressionRatio = totalCompressedSize / (totalPoints * Integer.BYTES); + + String[] record = { + dataset_name.get(file_i), + "Sub-columns", + String.valueOf(totalEncodeTime), + String.valueOf(totalDecodeTime), + String.valueOf(totalPoints), + String.valueOf(totalCompressedSize), + String.valueOf(compressionRatio) + }; + + writer.writeRecord(record); + System.out.println(compressionRatio); + + System.out.println("Block size: " + block_size); + } + + writer.close(); + } + } +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnDivTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnDivTest.java new file mode 100644 index 0000000..ff1bfd3 --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnDivTest.java
@@ -0,0 +1,400 @@ +package org.apache.iotdb.tsfile.encoding; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; + +public class SubcolumnDivTest { + + public static void int2Bytes(int integer, int encode_pos, byte[] cur_byte) { + cur_byte[encode_pos] = (byte) (integer >> 24); + cur_byte[encode_pos + 1] = (byte) (integer >> 16); + cur_byte[encode_pos + 2] = (byte) (integer >> 8); + cur_byte[encode_pos + 3] = (byte) (integer); + } + + public static int getBitWith(int num) { + if (num == 0) + return 1; + else + return 32 - Integer.numberOfLeadingZeros(num); + } + + public static int EncodeBits(int num, + int bit_width, + int encode_pos, + byte[] cur_byte, + int[] bit_index_list) { + // 找到要插入的位的索引 + int bit_index = bit_index_list[0];// cur_byte[encode_pos + 1]; + + // 计算数值的起始位位置 + int remaining_bits = bit_width; + + while (remaining_bits > 0) { + // 计算在当前字节中可以使用的位数 + int available_bits = bit_index; + int bits_to_write = Math.min(available_bits, remaining_bits); + + // 更新 bit_index + bit_index = available_bits - bits_to_write; + + // 计算要写入的位的掩码和数值 + int mask = (1 << bits_to_write) - 1; + int bits = (num >> (remaining_bits - bits_to_write)) & mask; + + // 写入到当前位置 + cur_byte[encode_pos] &= (byte) ~(mask << bit_index); // 清除对应位置的位 + cur_byte[encode_pos] |= (byte) (bits << bit_index); + + // 更新位宽和数值 + remaining_bits -= bits_to_write; + if (bit_index == 0) { + bit_index = 8; + encode_pos++; + } + } + bit_index_list[0] = bit_index; + // cur_byte[encode_pos + 1] = (byte) bit_index; + return encode_pos; + } + + public static int[] getAbsDeltaTsBlock( + int[] ts_block, + int i, + int block_size, + int remaining, + int[] min_delta) { + int[] ts_block_delta = new int[remaining - 1]; + + int value_delta_min = Integer.MAX_VALUE; + int value_delta_max = Integer.MIN_VALUE; + int base = i * block_size + 1; + int end = i * block_size + remaining; + + int tmp_j_1 = ts_block[base - 1]; + min_delta[0] = tmp_j_1; + int j = base; + int tmp_j; + + while (j < end) { + tmp_j = ts_block[j]; + int epsilon_v = tmp_j - tmp_j_1; + ts_block_delta[j - base] = epsilon_v; + if (epsilon_v < value_delta_min) { + value_delta_min = epsilon_v; + } + if (epsilon_v > value_delta_max) { + value_delta_max = epsilon_v; + } + tmp_j_1 = tmp_j; + j++; + } + j = 0; + end = remaining - 1; + while (j < end) { + ts_block_delta[j] = ts_block_delta[j] - value_delta_min; + j++; + } + + min_delta[1] = value_delta_min; + min_delta[2] = (value_delta_max - value_delta_min); + + return ts_block_delta; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + // 找到最后一个斜杠的位置,从而提取文件名 + int lastSlashIndex = path.lastIndexOf('/'); + + // 提取文件名(从最后一个斜杠之后开始) + String fileNameWithExtension = path.substring(lastSlashIndex + 1); + + // 去掉文件扩展名(.csv) + int dotIndex = fileNameWithExtension.lastIndexOf('.'); + if (dotIndex != -1) { + return fileNameWithExtension.substring(0, dotIndex); + } + + // 如果没有扩展名,直接返回文件名 + return fileNameWithExtension; + } + + @Test + public void BOSOptimalTest() throws IOException { + String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/"; + + String output_parent_dir = parent_dir + "subcolumn/compression_ratio"; + String input_parent_dir = parent_dir + "elf/src/test/resources/ElfData_Short"; + ArrayList<String> input_path_list = new ArrayList<>(); + ArrayList<String> output_path_list = new ArrayList<>(); + ArrayList<String> dataset_name = new ArrayList<>(); + ArrayList<Integer> dataset_block_size = new ArrayList<>(); + // dataset_name.add("CS-Sensors"); + // dataset_name.add("Metro-Traffic"); + // dataset_name.add("USGS-Earthquakes"); + // dataset_name.add("YZ-Electricity"); + // dataset_name.add("GW-Magnetic"); + // dataset_name.add("TY-Fuel"); + // dataset_name.add("Cyber-Vehicle"); + // dataset_name.add("Vehicle-Charge"); + // dataset_name.add("Nifty-Stocks"); + // dataset_name.add("TH-Climate"); + // dataset_name.add("TY-Transport"); + // dataset_name.add("EPM-Education"); + input_path_list.add(input_parent_dir); + dataset_block_size.add(1024); + output_path_list.add(output_parent_dir + "/float_ratio.csv"); // 0 + // for (String value : dataset_name) { + // input_path_list.add(input_parent_dir + value); + // dataset_block_size.add(1024); + // } + + // output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 + //// dataset_block_size.add(1024); + // output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 + //// dataset_block_size.add(2048); + // output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 + //// dataset_block_size.add(2048); + // output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 + //// dataset_block_size.add(2048); + // output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 + //// dataset_block_size.add(1024); + // output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 + //// dataset_block_size.add(2048); + // output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 + //// dataset_block_size.add(2048); + // output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 + //// dataset_block_size.add(2048); + // output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 + //// dataset_block_size.add(1024); + // output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 + //// dataset_block_size.add(2048); + // output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 + //// dataset_block_size.add(2048); + // output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 + //// dataset_block_size.add(1024); + + int repeatTime2 = 100; + // for (int file_i = 1; file_i < 2; file_i++) { + + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { + + String inputPath = input_path_list.get(file_i); + System.out.println(inputPath); + String Output = output_path_list.get(file_i); + + File file = new File(inputPath); + File[] tempList = file.listFiles(); + + CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Div", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); // write header to output file + + assert tempList != null; + + for (File f : tempList) { + // f=tempList[2]; + + // System.out.println(f); + String datasetName = extractFileName(f.toString()); + System.out.println(datasetName); + InputStream inputStream = Files.newInputStream(f.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + // ArrayList<Integer> data2 = new ArrayList<>(); + + loader.readHeaders(); + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) + max_decimal = cur_decimal; + // String value = loader.getValues()[index]; + data1.add(Float.valueOf(f_str)); + // data2.add(Integer.valueOf(loader.getValues()[1])); + // data.add(Integer.valueOf(value)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + for (int div = 2; div < 11; div++) { + System.out.println(div); + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime2; repeat++) { + length = SubcolumnEncoder(data2_arr, dataset_block_size.get(file_i), div, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime2); + compressed_size += length; + double ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + ratio += ratioTmp; + s = System.nanoTime(); + // for (int repeat = 0; repeat < repeatTime2; repeat++) + // SubcolumnDecoder(encoded_result); + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime2); + + String[] record = { + datasetName, + "TS_2DIFF+Sucolumn", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(div), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + System.out.println(ratio); + } + + // break; + } + writer.close(); + } + } + + private void SubcolumnDecoder(byte[] encodedResult) { + + } + + private int SubcolumnEncoder(int[] data, int block_size, int div, byte[] encoded_result) { + block_size++; + + int length_all = data.length; + + int encode_pos = 0; + int2Bytes(length_all, encode_pos, encoded_result); + encode_pos += 4; + + int block_num = length_all / block_size; + int2Bytes(block_size, encode_pos, encoded_result); + encode_pos += 4; + + for (int i = 0; i < block_num; i++) { + // int start_encode_pos = encode_pos; + encode_pos = PerBlockEncoder(data, i, block_size, block_size, div, encode_pos, encoded_result); + // System.out.println(encode_pos-start_encode_pos); + // System.out.println("------------------------------------------"); + } + + int remaining_length = length_all - block_num * block_size; + if (remaining_length <= 3) { + for (int i = remaining_length; i > 0; i--) { + int2Bytes(data[data.length - i], encode_pos, encoded_result); + encode_pos += 4; + } + + } else { + + int start = block_num * block_size; + int remaining = length_all - start; + encode_pos = PerBlockEncoder(data, block_num, block_size, remaining, div, encode_pos, encoded_result); + + } + + return encode_pos; + } + + private int PerBlockEncoder(int[] ts_block, int block_i, int block_size, int remaining, int mod, int encodePos, + byte[] encodedResult) { + + int[] min_delta = new int[3]; + int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); + + block_size = remaining - 1; + int max_delta_value = min_delta[2]; + int res = max_delta_value % mod; + int div = max_delta_value / mod; + int times_of_max = 1; + int cur_tmp = div; + while (cur_tmp >= mod) { + div = cur_tmp / mod; + res = cur_tmp % mod; + times_of_max++; + cur_tmp = div; + } + int[][] res_array = new int[times_of_max][block_size]; + int[] div_array = new int[block_size]; + + for (int i = 0; i < block_size; i++) { + int value = ts_block_delta[i]; + res = value % mod; + div = value / mod; + res_array[0][i] = res; + cur_tmp = div; + for (int j = 1; j < times_of_max; j++) { + div = cur_tmp / mod; + res = cur_tmp % mod; + res_array[j][i] = res; + cur_tmp = div; + } + } + int[] max_bit_width = new int[times_of_max]; + for (int j = 0; j < times_of_max; j++) { + int max_bit_width_cur = getBitWith(res_array[j][0]); + for (int i = 1; i < block_size; i++) { + if (max_bit_width_cur < getBitWith(res_array[j][i])) { + max_bit_width_cur = getBitWith(res_array[j][i]); + } + } + max_bit_width[j] = max_bit_width_cur; + } + encodePos += (times_of_max); + for (int i = 0; i < times_of_max; i++) { + encodePos += (max_bit_width[i] * (block_size / 8)); + } + encodePos += 8; + + return encodePos; + } + +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryCountTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryCountTest.java new file mode 100644 index 0000000..b4f6b13 --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryCountTest.java
@@ -0,0 +1,534 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +public class SubcolumnQueryCountTest { + + public static void Query(byte[] encoded_result, int target) { + + int encode_pos = 0; + + int data_length = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) + | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int block_size = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + // 查询结果 + int[] result = new int[data_length]; + int[] result_length = new int[1]; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockQueryCount(encoded_result, i, block_size, + block_size, encode_pos, target, + result, result_length); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + int value = ((encoded_result[encode_pos] & 0xFF) << 24) | + ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + if (value == target) { + result[result_length[0]]++; + } + encode_pos += 4; + } + } else { + encode_pos = BlockQueryCount(encoded_result, num_blocks, block_size, + remainder, encode_pos, target, + result, result_length); + } + + } + + public static int BlockQueryCount(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int target, int[] result, int[] result_length) { + int[] min_delta = new int[3]; + + min_delta[0] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + // int[] block_data = new int[remainder]; + + int m = encoded_result[encode_pos]; + encode_pos += 1; + + target -= min_delta[0]; + + // 候选索引列表,当前分列值和 target 相应值相等的索引 + int[] candidate_indices = new int[remainder]; + int candidate_length = 0; + for (int i = 0; i < remainder; i++) { + candidate_indices[i] = i; + candidate_length++; + } + + if (m == 0) { + if (target == 0) { + result[result_length[0]] += remainder; + } + return encode_pos; + } + + int bw = SubcolumnTest.bitWidth(block_size); + + int beta = encoded_result[encode_pos]; + encode_pos += 1; + + int l = (m + beta - 1) / beta; + + int[] bitWidthList = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 8, l, bitWidthList); + + // int[][] subcolumnList = new int[l][remainder]; + + int[] encodingType = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 1, l, encodingType); + + for (int i = l - 1; i >= 0; i--) { + int type = encodingType[i]; + if (type == 0) { + + if (target < 0) { + encode_pos *= 8; + encode_pos += bitWidthList[i] * remainder; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + encode_pos *= 8; + + int new_length = 0; + for (int j = 0; j < candidate_length; j++) { + int index = candidate_indices[j]; + + int current = SubcolumnTest.bytesToInt(encoded_result, + encode_pos + index * bitWidthList[i], bitWidthList[i]); + int value = (target >> (i * beta)) & ((1 << beta) - 1); + // if (subcolumnList[i][index] < value) { + // result[result_length[0]] = block_size * block_index + index; + // result_length[0]++; + // } else if (subcolumnList[i][index] == value) { + // candidate_indices[new_length] = index; + // new_length++; + // } + if (current == value) { + candidate_indices[new_length] = index; + new_length++; + } + } + + candidate_length = new_length; + + encode_pos += remainder * bitWidthList[i]; + encode_pos = (encode_pos + 7) / 8; + + } else { + + int index = ((encoded_result[encode_pos] & 0xFF) << 8) | (encoded_result[encode_pos + 1] & 0xFF); + + encode_pos += 2; + + if (target < 0) { + encode_pos *= 8; + encode_pos += bw * index; + encode_pos = (encode_pos + 7) / 8; + + encode_pos *= 8; + encode_pos += bitWidthList[i] * index; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + int[] run_length = new int[index]; + int[] rle_values = new int[index]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bw, index, run_length); + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bitWidthList[i], index, + rle_values); + + int new_length = 0; + int rleIndex = 0; + int currentPos = 0; + int value = (target >> (i * beta)) & ((1 << beta) - 1); + + for (int j = 0; j < candidate_length; j++) { + int index_candidate = candidate_indices[j]; + + while (rleIndex < index && currentPos + run_length[rleIndex] <= index_candidate) { + currentPos += run_length[rleIndex]; + rleIndex++; + } + + if (rleIndex < index) { + // if (rle_values[rleIndex] < value) { + // result[result_length[0]] = block_size * block_index + index_candidate; + // result_length[0]++; + // } else if (rle_values[rleIndex] == value) { + // candidate_indices[new_length] = index_candidate; + // new_length++; + // } + if (rle_values[rleIndex] == value) { + candidate_indices[new_length] = index_candidate; + new_length++; + } + } + } + + candidate_length = new_length; + + } + } + + // if (target <= 0) { + // for (int i = 0; i < remainder; i++) { + // result[result_length[0]] = block_size * block_index + i; + // result_length[0]++; + // } + // return encode_pos; + // } + + result[result_length[0]] += candidate_length; + + return encode_pos; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testQuery() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_block/"; + // String output_parent_dir = parent_dir + "result/query_vs_block/"; + + int[] block_size_list = { 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 }; + + HashMap<String, Integer> queryRange = new HashMap<>(); + + queryRange.put("Bird-migration", 2600000); + queryRange.put("Bitcoin-price", 170000000); + queryRange.put("City-temp", 700); + queryRange.put("Dewpoint-temp", 9600); + queryRange.put("IR-bio-temp", -200); + queryRange.put("PM10-dust", 2000); + queryRange.put("Stocks-DE", 90000); + queryRange.put("Stocks-UK", 30000); + queryRange.put("Stocks-USA", 6000); + queryRange.put("Wind-Speed", 60); + queryRange.put("Wine-Tasting", 10); + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int block_size : block_size_list) { + String outputPath = output_parent_dir + "subcolumn_query_count_block_" + block_size + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnTest.Encoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + ratio += ratioTmp; + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + Query(encoded_result, queryRange.get(datasetName)); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("block_size: " + block_size); + + System.out.println(ratio); + } + + writer.close(); + } + } + + @Test + public void testQueryBeta() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_beta/"; + // String output_parent_dir = parent_dir + "result/query_vs_beta/"; + + int[] beta_list = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + + int block_size = 512; + + HashMap<String, Integer> queryRange = new HashMap<>(); + + queryRange.put("Bird-migration", 2600000); + queryRange.put("Bitcoin-price", 170000000); + queryRange.put("City-temp", 700); + queryRange.put("Dewpoint-temp", 9600); + queryRange.put("IR-bio-temp", -200); + queryRange.put("PM10-dust", 2000); + queryRange.put("Stocks-DE", 90000); + queryRange.put("Stocks-UK", 30000); + queryRange.put("Stocks-USA", 6000); + queryRange.put("Wind-Speed", 60); + queryRange.put("Wine-Tasting", 10); + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int beta : beta_list) { + String outputPath = output_parent_dir + "subcolumn_query_count_beta_" + beta + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnBetaTest.Encoder(data2_arr, block_size, encoded_result, beta); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + ratio += ratioTmp; + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + SubcolumnQueryCountTest.Query(encoded_result, queryRange.get(datasetName)); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("beta: " + beta); + + System.out.println(ratio); + } + + writer.close(); + } + } + +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryEqualTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryEqualTest.java new file mode 100644 index 0000000..1272bc2 --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryEqualTest.java
@@ -0,0 +1,534 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +public class SubcolumnQueryEqualTest { + + public static void Query(byte[] encoded_result, int target) { + + int encode_pos = 0; + + int data_length = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) + | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int block_size = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + // 查询结果 + int[] result = new int[data_length]; + int[] result_length = new int[1]; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockQueryIndex(encoded_result, i, block_size, + block_size, encode_pos, target, + result, result_length); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + int value = ((encoded_result[encode_pos] & 0xFF) << 24) | + ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + if (value == target) { + result[result_length[0]] = value; + result_length[0]++; + } + encode_pos += 4; + } + } else { + encode_pos = BlockQueryIndex(encoded_result, num_blocks, block_size, + remainder, encode_pos, target, + result, result_length); + } + + } + + public static int BlockQueryIndex(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int target, int[] result, int[] result_length) { + int[] min_delta = new int[3]; + + min_delta[0] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + // int[] block_data = new int[remainder]; + + int m = encoded_result[encode_pos]; + encode_pos += 1; + + target -= min_delta[0]; + + // 候选索引列表,当前分列值和 target 相应值相等的索引 + int[] candidate_indices = new int[remainder]; + int candidate_length = 0; + for (int i = 0; i < remainder; i++) { + candidate_indices[i] = i; + candidate_length++; + } + + if (m == 0) { + if (target == 0) { + for (int i = 0; i < remainder; i++) { + result[result_length[0]] = block_size * block_index + i; + result_length[0]++; + } + } + return encode_pos; + } + + int bw = SubcolumnTest.bitWidth(block_size); + + int beta = encoded_result[encode_pos]; + encode_pos += 1; + + int l = (m + beta - 1) / beta; + + int[] bitWidthList = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 8, l, bitWidthList); + + int[][] subcolumnList = new int[l][remainder]; + + int[] encodingType = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 1, l, encodingType); + + for (int i = l - 1; i >= 0; i--) { + int type = encodingType[i]; + if (type == 0) { + + if (target < 0) { + encode_pos *= 8; + encode_pos += bitWidthList[i] * remainder; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + encode_pos *= 8; + + int new_length = 0; + for (int j = 0; j < candidate_length; j++) { + int index = candidate_indices[j]; + + subcolumnList[i][index] = SubcolumnTest.bytesToInt(encoded_result, + encode_pos + index * bitWidthList[i], bitWidthList[i]); + int value = (target >> (i * beta)) & ((1 << beta) - 1); + // if (subcolumnList[i][index] < value) { + // result[result_length[0]] = block_size * block_index + index; + // result_length[0]++; + // } else if (subcolumnList[i][index] == value) { + // candidate_indices[new_length] = index; + // new_length++; + // } + if (subcolumnList[i][index] == value) { + candidate_indices[new_length] = index; + new_length++; + } + } + + candidate_length = new_length; + + encode_pos += remainder * bitWidthList[i]; + encode_pos = (encode_pos + 7) / 8; + + } else { + + int index = ((encoded_result[encode_pos] & 0xFF) << 8) | (encoded_result[encode_pos + 1] & 0xFF); + + encode_pos += 2; + + if (target < 0) { + encode_pos *= 8; + encode_pos += bw * index; + encode_pos = (encode_pos + 7) / 8; + + encode_pos *= 8; + encode_pos += bitWidthList[i] * index; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + int[] run_length = new int[index]; + int[] rle_values = new int[index]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bw, index, run_length); + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bitWidthList[i], index, + rle_values); + + int new_length = 0; + int rleIndex = 0; + int currentPos = 0; + int value = (target >> (i * beta)) & ((1 << beta) - 1); + + for (int j = 0; j < candidate_length; j++) { + int index_candidate = candidate_indices[j]; + + while (rleIndex < index && currentPos + run_length[rleIndex] <= index_candidate) { + currentPos += run_length[rleIndex]; + rleIndex++; + } + + if (rleIndex < index) { + // if (rle_values[rleIndex] < value) { + // result[result_length[0]] = block_size * block_index + index_candidate; + // result_length[0]++; + // } else if (rle_values[rleIndex] == value) { + // candidate_indices[new_length] = index_candidate; + // new_length++; + // } + if (rle_values[rleIndex] == value) { + candidate_indices[new_length] = index_candidate; + new_length++; + } + } + } + + candidate_length = new_length; + + } + } + + // if (target <= 0) { + // for (int i = 0; i < remainder; i++) { + // result[result_length[0]] = block_size * block_index + i; + // result_length[0]++; + // } + // return encode_pos; + // } + for (int i = 0; i < candidate_length; i++) { + result[result_length[0]] = block_size * block_index + candidate_indices[i]; + result_length[0]++; + } + + return encode_pos; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testQuery() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_block/"; + // String output_parent_dir = parent_dir + "result/query_vs_block/"; + + int[] block_size_list = { 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 }; + + HashMap<String, Integer> queryRange = new HashMap<>(); + + queryRange.put("Bird-migration", 2600000); + queryRange.put("Bitcoin-price", 170000000); + queryRange.put("City-temp", 700); + queryRange.put("Dewpoint-temp", 9600); + queryRange.put("IR-bio-temp", -200); + queryRange.put("PM10-dust", 2000); + queryRange.put("Stocks-DE", 90000); + queryRange.put("Stocks-UK", 30000); + queryRange.put("Stocks-USA", 6000); + queryRange.put("Wind-Speed", 60); + queryRange.put("Wine-Tasting", 10); + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int block_size : block_size_list) { + String outputPath = output_parent_dir + "subcolumn_query_equal_block_" + block_size + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) + max_decimal = cur_decimal; + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnTest.Encoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + Query(encoded_result, queryRange.get(datasetName)); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("block_size: " + block_size); + + System.out.println(ratio); + } + + writer.close(); + } + } + + @Test + public void testQueryBeta() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_beta/"; + // String output_parent_dir = parent_dir + "result/query_vs_beta/"; + + int[] beta_list = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + + int block_size = 512; + + HashMap<String, Integer> queryRange = new HashMap<>(); + + queryRange.put("Bird-migration", 2600000); + queryRange.put("Bitcoin-price", 170000000); + queryRange.put("City-temp", 700); + queryRange.put("Dewpoint-temp", 9600); + queryRange.put("IR-bio-temp", -200); + queryRange.put("PM10-dust", 2000); + queryRange.put("Stocks-DE", 90000); + queryRange.put("Stocks-UK", 30000); + queryRange.put("Stocks-USA", 6000); + queryRange.put("Wind-Speed", 60); + queryRange.put("Wine-Tasting", 10); + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int beta : beta_list) { + String outputPath = output_parent_dir + "subcolumn_query_equal_beta_" + beta + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) + max_decimal = cur_decimal; + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnBetaTest.Encoder(data2_arr, block_size, encoded_result, beta); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + SubcolumnQueryEqualTest.Query(encoded_result, queryRange.get(datasetName)); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("beta: " + beta); + + System.out.println(ratio); + } + + writer.close(); + } + } + +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryGreaterLessTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryGreaterLessTest.java new file mode 100644 index 0000000..48bb58a --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryGreaterLessTest.java
@@ -0,0 +1,547 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +public class SubcolumnQueryGreaterLessTest { + + public static void Query(byte[] encoded_result, int lower_bound, int upper_bound) { + + int encode_pos = 0; + + int data_length = SubcolumnTest.bytes2Integer(encoded_result, encode_pos, 4); + encode_pos += 4; + + int block_size = SubcolumnTest.bytes2Integer(encoded_result, encode_pos, 4); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + // 查询结果 + int[] result = new int[data_length]; + int[] result_length = new int[1]; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockQueryIndex(encoded_result, i, block_size, + block_size, encode_pos, lower_bound, upper_bound, + result, result_length); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + int value = SubcolumnTest.bytes2Integer(encoded_result, encode_pos, 4); + if (value > lower_bound && value < upper_bound) { + result[result_length[0]] = value; + result_length[0]++; + } + encode_pos += 4; + } + } else { + encode_pos = BlockQueryIndex(encoded_result, num_blocks, block_size, + remainder, encode_pos, lower_bound, upper_bound, + result, result_length); + } + + } + + public static int BlockQueryIndex(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int lower_bound, int upper_bound, int[] result, int[] result_length) { + int[] min_delta = new int[3]; + + min_delta[0] = SubcolumnTest.bytes2Integer(encoded_result, encode_pos, 4); + encode_pos += 4; + + // int[] block_data = new int[remainder]; + + // int m = encoded_result[encode_pos]; + int m = SubcolumnTest.bytes2Integer(encoded_result, encode_pos, 1); + encode_pos += 1; + + lower_bound -= min_delta[0]; + upper_bound -= min_delta[0]; + + // 候选索引 + int[] candidate_indices = new int[remainder]; + int candidate_length = 0; + for (int i = 0; i < remainder; i++) { + candidate_indices[i] = i; + candidate_length++; + } + + if (m == 0) { + if (lower_bound < 0 && upper_bound > 0) { + for (int i = 0; i < remainder; i++) { + result[result_length[0]] = block_size * block_index + i; + result_length[0]++; + } + } + return encode_pos; + } + + int bw = SubcolumnTest.bitWidth(block_size); + + // int beta = encoded_result[encode_pos]; + int beta = SubcolumnTest.bytes2Integer(encoded_result, encode_pos, 1); + encode_pos += 1; + + int l = (m + beta - 1) / beta; + + int[] bitWidthList = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 8, l, bitWidthList); + + int[][] subcolumnList = new int[l][remainder]; + + int[] encodingType = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 1, l, encodingType); + + for (int i = l - 1; i >= 0; i--) { + int type = encodingType[i]; + if (type == 0) { + + if (upper_bound <= 0) { + encode_pos *= 8; + encode_pos += bitWidthList[i] * remainder; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + encode_pos *= 8; + + int new_length = 0; + for (int j = 0; j < candidate_length; j++) { + int index = candidate_indices[j]; + + subcolumnList[i][index] = SubcolumnTest.bytesToInt(encoded_result, + encode_pos + index * bitWidthList[i], bitWidthList[i]); + int lower_value = (lower_bound >> (i * beta)) & ((1 << beta) - 1); + int upper_value = (upper_bound >> (i * beta)) & ((1 << beta) - 1); + if (subcolumnList[i][index] > lower_value && subcolumnList[i][index] < upper_value) { + result[result_length[0]] = block_size * block_index + index; + result_length[0]++; + } else if (subcolumnList[i][index] == lower_value + || subcolumnList[i][index] == upper_value) { + candidate_indices[new_length] = index; + new_length++; + } + } + + candidate_length = new_length; + + encode_pos += remainder * bitWidthList[i]; + encode_pos = (encode_pos + 7) / 8; + + } else { + + int index = ((encoded_result[encode_pos] & 0xFF) << 8) | (encoded_result[encode_pos + 1] & 0xFF); + encode_pos += 2; + + if (upper_bound <= 0) { + encode_pos *= 8; + encode_pos += bw * index; + encode_pos = (encode_pos + 7) / 8; + + encode_pos *= 8; + encode_pos += bitWidthList[i] * index; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + int[] run_length = new int[index]; + int[] rle_values = new int[index]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bw, index, run_length); + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bitWidthList[i], index, + rle_values); + + int new_length = 0; + int rleIndex = 0; + int currentPos = 0; + int lower_value = (lower_bound >> (i * beta)) & ((1 << beta) - 1); + int upper_value = (upper_bound >> (i * beta)) & ((1 << beta) - 1); + + for (int j = 0; j < candidate_length; j++) { + int index_candidate = candidate_indices[j]; + + while (rleIndex < index && currentPos + run_length[rleIndex] <= index_candidate) { + currentPos += run_length[rleIndex]; + rleIndex++; + } + + if (rleIndex < index) { + if (rle_values[rleIndex] > lower_value && rle_values[rleIndex] < upper_value) { + result[result_length[0]] = block_size * block_index + index_candidate; + result_length[0]++; + } else if (rle_values[rleIndex] == lower_value + || rle_values[rleIndex] == upper_value) { + candidate_indices[new_length] = index_candidate; + new_length++; + } + } + } + + candidate_length = new_length; + + } + } + + return encode_pos; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testQuery() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_block/"; + // String output_parent_dir = parent_dir + "result/query_vs_block/"; + + int[] block_size_list = { 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 }; + + HashMap<String, Integer> queryGreaterRange = new HashMap<>(); + + queryGreaterRange.put("Bird-migration", 2500000); + queryGreaterRange.put("Bitcoin-price", 160000000); + queryGreaterRange.put("City-temp", 480); + queryGreaterRange.put("Dewpoint-temp", 9500); + queryGreaterRange.put("IR-bio-temp", -300); + queryGreaterRange.put("PM10-dust", 1000); + queryGreaterRange.put("Stocks-DE", 40000); + queryGreaterRange.put("Stocks-UK", 20000); + queryGreaterRange.put("Stocks-USA", 5000); + queryGreaterRange.put("Wind-Speed", 50); + queryGreaterRange.put("Wine-Tasting", 0); + + HashMap<String, Integer> queryLessRange = new HashMap<>(); + + queryLessRange.put("Bird-migration", 2600000); + queryLessRange.put("Bitcoin-price", 170000000); + queryLessRange.put("City-temp", 700); + queryLessRange.put("Dewpoint-temp", 9600); + queryLessRange.put("IR-bio-temp", -200); + queryLessRange.put("PM10-dust", 2000); + queryLessRange.put("Stocks-DE", 90000); + queryLessRange.put("Stocks-UK", 30000); + queryLessRange.put("Stocks-USA", 6000); + queryLessRange.put("Wind-Speed", 60); + queryLessRange.put("Wine-Tasting", 10); + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int block_size : block_size_list) { + String outputPath = output_parent_dir + "subcolumn_query_greater_less_block_" + block_size + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnTest.Encoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + Query(encoded_result, queryGreaterRange.get(datasetName), + queryLessRange.get(datasetName)); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("block_size: " + block_size); + + System.out.println(ratio); + } + + writer.close(); + } + } + + @Test + public void testQueryBeta() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_beta/"; + // String output_parent_dir = parent_dir + "result/query_vs_beta/"; + + int[] beta_list = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + + int block_size = 512; + + HashMap<String, Integer> queryGreaterRange = new HashMap<>(); + + queryGreaterRange.put("Bird-migration", 2500000); + queryGreaterRange.put("Bitcoin-price", 160000000); + queryGreaterRange.put("City-temp", 480); + queryGreaterRange.put("Dewpoint-temp", 9500); + queryGreaterRange.put("IR-bio-temp", -300); + queryGreaterRange.put("PM10-dust", 1000); + queryGreaterRange.put("Stocks-DE", 40000); + queryGreaterRange.put("Stocks-UK", 20000); + queryGreaterRange.put("Stocks-USA", 5000); + queryGreaterRange.put("Wind-Speed", 50); + queryGreaterRange.put("Wine-Tasting", 0); + + HashMap<String, Integer> queryLessRange = new HashMap<>(); + + queryLessRange.put("Bird-migration", 2600000); + queryLessRange.put("Bitcoin-price", 170000000); + queryLessRange.put("City-temp", 700); + queryLessRange.put("Dewpoint-temp", 9600); + queryLessRange.put("IR-bio-temp", -200); + queryLessRange.put("PM10-dust", 2000); + queryLessRange.put("Stocks-DE", 90000); + queryLessRange.put("Stocks-UK", 30000); + queryLessRange.put("Stocks-USA", 6000); + queryLessRange.put("Wind-Speed", 60); + queryLessRange.put("Wine-Tasting", 10); + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int beta : beta_list) { + String outputPath = output_parent_dir + "subcolumn_query_greater_less_beta_" + beta + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnBetaTest.Encoder(data2_arr, block_size, encoded_result, beta); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + SubcolumnQueryGreaterLessTest.Query(encoded_result, + queryGreaterRange.get(datasetName), + queryLessRange.get(datasetName)); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("beta: " + beta); + + System.out.println(ratio); + } + + writer.close(); + } + } + +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryGreaterTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryGreaterTest.java new file mode 100644 index 0000000..0fbbecf --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryGreaterTest.java
@@ -0,0 +1,522 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +public class SubcolumnQueryGreaterTest { + + public static void Query(byte[] encoded_result, int lower_bound) { + + int encode_pos = 0; + + int data_length = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) + | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int block_size = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + // 查询结果 + int[] result = new int[data_length]; + int[] result_length = new int[1]; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockQueryIndex(encoded_result, i, block_size, + block_size, encode_pos, lower_bound, + result, result_length); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + int value = ((encoded_result[encode_pos] & 0xFF) << 24) | + ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + if (value > lower_bound) { + result[result_length[0]] = value; + result_length[0]++; + } + encode_pos += 4; + } + } else { + encode_pos = BlockQueryIndex(encoded_result, num_blocks, block_size, + remainder, encode_pos, lower_bound, + result, result_length); + } + + } + + public static int BlockQueryIndex(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int lower_bound, int[] result, int[] result_length) { + int[] min_delta = new int[3]; + + min_delta[0] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + // int[] block_data = new int[remainder]; + + int m = encoded_result[encode_pos]; + encode_pos += 1; + + lower_bound -= min_delta[0]; + + // 候选索引列表,当前分列值和 lower_bound 相应值相等的索引 + int[] candidate_indices = new int[remainder]; + int candidate_length = 0; + for (int i = 0; i < remainder; i++) { + candidate_indices[i] = i; + candidate_length++; + } + + if (m == 0) { + if (lower_bound < 0) { + for (int i = 0; i < remainder; i++) { + result[result_length[0]] = block_size * block_index + i; + result_length[0]++; + } + } + return encode_pos; + } + + int bw = SubcolumnTest.bitWidth(block_size); + + int beta = encoded_result[encode_pos]; + encode_pos += 1; + + int l = (m + beta - 1) / beta; + + int[] bitWidthList = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 8, l, bitWidthList); + + int[][] subcolumnList = new int[l][remainder]; + + int[] encodingType = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 1, l, encodingType); + + for (int i = l - 1; i >= 0; i--) { + int type = encodingType[i]; + if (type == 0) { + + if (lower_bound <= 0) { + encode_pos *= 8; + encode_pos += bitWidthList[i] * remainder; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + encode_pos *= 8; + + int new_length = 0; + for (int j = 0; j < candidate_length; j++) { + int index = candidate_indices[j]; + + subcolumnList[i][index] = SubcolumnTest.bytesToInt(encoded_result, + encode_pos + index * bitWidthList[i], bitWidthList[i]); + int value = (lower_bound >> (i * beta)) & ((1 << beta) - 1); + if (subcolumnList[i][index] > value) { + result[result_length[0]] = block_size * block_index + index; + result_length[0]++; + } else if (subcolumnList[i][index] == value) { + candidate_indices[new_length] = index; + new_length++; + } + } + + candidate_length = new_length; + + encode_pos += remainder * bitWidthList[i]; + encode_pos = (encode_pos + 7) / 8; + + } else { + + int index = ((encoded_result[encode_pos] & 0xFF) << 8) | (encoded_result[encode_pos + 1] & 0xFF); + + encode_pos += 2; + + if (lower_bound <= 0) { + encode_pos *= 8; + encode_pos += bw * index; + encode_pos = (encode_pos + 7) / 8; + + encode_pos *= 8; + encode_pos += bitWidthList[i] * index; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + int[] run_length = new int[index]; + int[] rle_values = new int[index]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bw, index, run_length); + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bitWidthList[i], index, + rle_values); + + int new_length = 0; + int rleIndex = 0; + int currentPos = 0; + int value = (lower_bound >> (i * beta)) & ((1 << beta) - 1); + + for (int j = 0; j < candidate_length; j++) { + int index_candidate = candidate_indices[j]; + + while (rleIndex < index && currentPos + run_length[rleIndex] <= index_candidate) { + currentPos += run_length[rleIndex]; + rleIndex++; + } + + if (rleIndex < index) { + if (rle_values[rleIndex] > value) { + result[result_length[0]] = block_size * block_index + index_candidate; + result_length[0]++; + } else if (rle_values[rleIndex] == value) { + candidate_indices[new_length] = index_candidate; + new_length++; + } + } + } + + candidate_length = new_length; + + } + } + + if (lower_bound <= 0) { + for (int i = 0; i < remainder; i++) { + result[result_length[0]] = block_size * block_index + i; + result_length[0]++; + } + return encode_pos; + } + + return encode_pos; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testQuery() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_block/"; + // String output_parent_dir = parent_dir + "result/query_vs_block/"; + + int[] block_size_list = { 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 }; + + HashMap<String, Integer> queryRange = new HashMap<>(); + + queryRange.put("Bird-migration", 2500000); + queryRange.put("Bitcoin-price", 160000000); + queryRange.put("City-temp", 480); + queryRange.put("Dewpoint-temp", 9500); + queryRange.put("IR-bio-temp", -300); + queryRange.put("PM10-dust", 1000); + queryRange.put("Stocks-DE", 40000); + queryRange.put("Stocks-UK", 20000); + queryRange.put("Stocks-USA", 5000); + queryRange.put("Wind-Speed", 50); + queryRange.put("Wine-Tasting", 10); + + int repeatTime = 100; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int block_size : block_size_list) { + String outputPath = output_parent_dir + "subcolumn_query_greater_block_" + block_size + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) + max_decimal = cur_decimal; + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnTest.Encoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + Query(encoded_result, queryRange.get(datasetName)); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("block_size: " + block_size); + + System.out.println(ratio); + } + + writer.close(); + } + } + + @Test + public void testQueryBeta() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_beta/"; + // String output_parent_dir = parent_dir + "result/query_vs_beta/"; + + int[] beta_list = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + + int block_size = 512; + + HashMap<String, Integer> queryRange = new HashMap<>(); + + queryRange.put("Bird-migration", 2500000); + queryRange.put("Bitcoin-price", 160000000); + queryRange.put("City-temp", 480); + queryRange.put("Dewpoint-temp", 9500); + queryRange.put("IR-bio-temp", -300); + queryRange.put("PM10-dust", 1000); + queryRange.put("Stocks-DE", 40000); + queryRange.put("Stocks-UK", 20000); + queryRange.put("Stocks-USA", 5000); + queryRange.put("Wind-Speed", 50); + queryRange.put("Wine-Tasting", 0); + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int beta : beta_list) { + String outputPath = output_parent_dir + "subcolumn_query_greater_beta_" + beta + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) + max_decimal = cur_decimal; + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnBetaTest.Encoder(data2_arr, block_size, encoded_result, beta); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + SubcolumnQueryGreaterTest.Query(encoded_result, queryRange.get(datasetName)); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("beta: " + beta); + + System.out.println(ratio); + } + + writer.close(); + } + } + +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryLessPartsTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryLessPartsTest.java new file mode 100644 index 0000000..8744b5e --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryLessPartsTest.java
@@ -0,0 +1,867 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +public class SubcolumnQueryLessPartsTest { + + public static void QueryTwoColumns(byte[] encoded_result1, byte[] encoded_result2, int upper_bound1, + int upper_bound2) { + int[] first_column_results = new int[encoded_result1.length]; + int[] first_result_length = new int[1]; + + Query(encoded_result1, upper_bound1, first_column_results, first_result_length); + + int[] final_results = new int[first_result_length[0]]; + int[] final_result_length = new int[1]; + + QueryWithIndices(encoded_result2, upper_bound2, first_column_results, first_result_length[0], + final_results, final_result_length); + } + + public static void QueryWithIndices(byte[] encoded_result, int upper_bound, + int[] candidate_indices, int candidate_length, + int[] result, int[] result_length) { + int encode_pos = 0; + + int data_length = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) + | ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int block_size = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + // 初始化结果索引 + result_length[0] = 0; + + int[] blockIndicesCount = new int[num_blocks + 1]; + + for (int i = 0; i < candidate_length; i++) { + int index = candidate_indices[i]; + int blockIndex = index / block_size; + blockIndicesCount[blockIndex]++; + } + + int[][] blockIndices = new int[num_blocks + 1][]; + for (int i = 0; i <= num_blocks; i++) { + blockIndices[i] = new int[blockIndicesCount[i]]; + } + + int[] currentIndices = new int[num_blocks + 1]; + + for (int i = 0; i < candidate_length; i++) { + int index = candidate_indices[i]; + int blockIndex = index / block_size; + int localIndex = index % block_size; + + blockIndices[blockIndex][currentIndices[blockIndex]] = localIndex; + currentIndices[blockIndex]++; + } + + // 遍历所有块 + for (int i = 0; i < num_blocks; i++) { + + if (blockIndicesCount[i] == 0) { + // 计算跳过此块所需的字节数 + encode_pos = SkipBlock(encoded_result, i, block_size, + block_size, encode_pos); + continue; + } + + // 对该块中的候选索引执行查询 + encode_pos = BlockQueryWithIndices(encoded_result, i, block_size, + block_size, encode_pos, upper_bound, + blockIndices[i], blockIndicesCount[i], result, result_length); + } + + int remainder = data_length % block_size; + + if (remainder > 0) { + if (blockIndicesCount[num_blocks] > 0) { + if (remainder <= 3) { + for (int j = 0; j < blockIndicesCount[num_blocks]; j++) { + int idx = blockIndices[num_blocks][j]; + int offset = num_blocks * block_size + idx; + if (offset < data_length) { + int value = ((encoded_result[encode_pos + idx * 4] & 0xFF) << 24) | + ((encoded_result[encode_pos + idx * 4 + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + idx * 4 + 2] & 0xFF) << 8) | + (encoded_result[encode_pos + idx * 4 + 3] & 0xFF); + if (value < upper_bound) { + result[result_length[0]] = offset; + result_length[0]++; + } + } + } + encode_pos += remainder * 4; + } else { + + encode_pos = BlockQueryWithIndices(encoded_result, num_blocks, block_size, + remainder, encode_pos, upper_bound, + blockIndices[num_blocks], blockIndicesCount[num_blocks], result, result_length); + } + } else { + // 没有候选索引,跳过剩余部分 + if (remainder <= 3) { + encode_pos += remainder * 4; + } else { + encode_pos = SkipBlock(encoded_result, num_blocks, block_size, + remainder, encode_pos); + } + } + } + } + + public static int BlockQueryWithIndices(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int upper_bound, int[] candidate_indices, int candidate_length, + int[] result, int[] result_length) { + int[] min_delta = new int[3]; + + min_delta[0] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int m = encoded_result[encode_pos]; + encode_pos += 1; + + upper_bound -= min_delta[0]; + + // 所有索引默认都是候选索引 + int[] filtered_indices = new int[candidate_length]; + int filtered_length = candidate_length; + System.arraycopy(candidate_indices, 0, filtered_indices, 0, candidate_length); + + if (m == 0) { + if (upper_bound > 0) { + for (int i = 0; i < filtered_length; i++) { + result[result_length[0]] = block_size * block_index + filtered_indices[i]; + result_length[0]++; + } + } + return encode_pos; + } + + int bw = SubcolumnTest.bitWidth(block_size); + + int beta = encoded_result[encode_pos]; + encode_pos += 1; + + int l = (m + beta - 1) / beta; + + int[] bitWidthList = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 8, l, bitWidthList); + + int[][] subcolumnList = new int[l][remainder]; + + int[] encodingType = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 1, l, encodingType); + + for (int i = l - 1; i >= 0; i--) { + int type = encodingType[i]; + if (type == 0) { + if (upper_bound <= 0) { + encode_pos *= 8; + encode_pos += bitWidthList[i] * remainder; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + encode_pos *= 8; + + int new_length = 0; + for (int j = 0; j < filtered_length; j++) { + int index = filtered_indices[j]; + + subcolumnList[i][index] = SubcolumnTest.bytesToInt(encoded_result, + encode_pos + index * bitWidthList[i], bitWidthList[i]); + int value = (upper_bound >> (i * beta)) & ((1 << beta) - 1); + if (subcolumnList[i][index] < value) { + result[result_length[0]] = block_size * block_index + index; + result_length[0]++; + } else if (subcolumnList[i][index] == value) { + filtered_indices[new_length] = index; + new_length++; + } + } + + filtered_length = new_length; + + encode_pos += remainder * bitWidthList[i]; + encode_pos = (encode_pos + 7) / 8; + + } else { + int index = ((encoded_result[encode_pos] & 0xFF) << 8) | (encoded_result[encode_pos + 1] & 0xFF); + + encode_pos += 2; + + if (upper_bound <= 0) { + encode_pos *= 8; + encode_pos += bw * index; + encode_pos = (encode_pos + 7) / 8; + + encode_pos *= 8; + encode_pos += bitWidthList[i] * index; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + int[] run_length = new int[index]; + int[] rle_values = new int[index]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bw, index, run_length); + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bitWidthList[i], index, + rle_values); + + int new_length = 0; + int value = (upper_bound >> (i * beta)) & ((1 << beta) - 1); + + // 为每个候选索引查找对应的RLE值 + for (int j = 0; j < filtered_length; j++) { + int index_candidate = filtered_indices[j]; + + // 查找包含此索引的RLE段 + int rleIndex = 0; + int currentPos = 0; + + while (rleIndex < index && currentPos + run_length[rleIndex] <= index_candidate) { + currentPos += run_length[rleIndex]; + rleIndex++; + } + + if (rleIndex < index) { + if (rle_values[rleIndex] < value) { + result[result_length[0]] = block_size * block_index + index_candidate; + result_length[0]++; + } else if (rle_values[rleIndex] == value) { + filtered_indices[new_length] = index_candidate; + new_length++; + } + } + } + + filtered_length = new_length; + } + } + + return encode_pos; + } + + private static int SkipBlock(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos) { + // int[] min_delta = new int[3]; + + encode_pos += 4; + + int m = encoded_result[encode_pos]; + encode_pos += 1; + + if (m == 0) { + return encode_pos; + } + + int bw = SubcolumnTest.bitWidth(block_size); + + int beta = encoded_result[encode_pos]; + encode_pos += 1; + + int l = (m + beta - 1) / beta; + + int[] bitWidthList = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 8, l, bitWidthList); + + int[] encodingType = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 1, l, encodingType); + + for (int i = l - 1; i >= 0; i--) { + int type = encodingType[i]; + + if (type == 0) { + + encode_pos = (encode_pos * 8 + bitWidthList[i] * remainder + 7) / 8; + } else { + int index = ((encoded_result[encode_pos] & 0xFF) << 8) | (encoded_result[encode_pos + 1] & 0xFF); + encode_pos += 2; + + encode_pos = (encode_pos * 8 + bw * index + 7) / 8; + encode_pos = (encode_pos * 8 + bitWidthList[i] * index + 7) / 8; + } + } + + return encode_pos; + } + + public static void Query(byte[] encoded_result, int upper_bound, int[] result, int[] result_length) { + + int encode_pos = 0; + + int data_length = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) + | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int block_size = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + // 查询结果 + // int[] result = new int[data_length]; + // int[] result_length = new int[1]; + + result_length[0] = 0; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockQueryIndex(encoded_result, i, block_size, + block_size, encode_pos, upper_bound, + result, result_length); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + int value = ((encoded_result[encode_pos] & 0xFF) << 24) | + ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + if (value < upper_bound) { + result[result_length[0]] = value; + result_length[0]++; + } + encode_pos += 4; + } + } else { + encode_pos = BlockQueryIndex(encoded_result, num_blocks, block_size, + remainder, encode_pos, upper_bound, + result, result_length); + } + + } + + public static int BlockQueryIndex(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int upper_bound, int[] result, int[] result_length) { + int[] min_delta = new int[3]; + + min_delta[0] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + // int[] block_data = new int[remainder]; + + int m = encoded_result[encode_pos]; + encode_pos += 1; + + upper_bound -= min_delta[0]; + + // 候选索引列表,当前分列值和 upper_bound 相应值相等的索引 + int[] candidate_indices = new int[remainder]; + int candidate_length = 0; + for (int i = 0; i < remainder; i++) { + candidate_indices[i] = i; + candidate_length++; + } + + if (m == 0) { + if (upper_bound > 0) { + for (int i = 0; i < remainder; i++) { + result[result_length[0]] = block_size * block_index + i; + result_length[0]++; + } + } + return encode_pos; + } + + int bw = SubcolumnTest.bitWidth(block_size); + + int beta = encoded_result[encode_pos]; + encode_pos += 1; + + int l = (m + beta - 1) / beta; + + int[] bitWidthList = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 8, l, bitWidthList); + + int[][] subcolumnList = new int[l][remainder]; + + int[] encodingType = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 1, l, encodingType); + + for (int i = l - 1; i >= 0; i--) { + int type = encodingType[i]; + if (type == 0) { + + if (upper_bound <= 0) { + encode_pos *= 8; + encode_pos += bitWidthList[i] * remainder; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + encode_pos *= 8; + + int new_length = 0; + for (int j = 0; j < candidate_length; j++) { + int index = candidate_indices[j]; + + subcolumnList[i][index] = SubcolumnTest.bytesToInt(encoded_result, + encode_pos + index * bitWidthList[i], bitWidthList[i]); + int value = (upper_bound >> (i * beta)) & ((1 << beta) - 1); + if (subcolumnList[i][index] < value) { + result[result_length[0]] = block_size * block_index + index; + result_length[0]++; + } else if (subcolumnList[i][index] == value) { + candidate_indices[new_length] = index; + new_length++; + } + } + + candidate_length = new_length; + + encode_pos += remainder * bitWidthList[i]; + encode_pos = (encode_pos + 7) / 8; + + } else { + + int index = ((encoded_result[encode_pos] & 0xFF) << 8) | (encoded_result[encode_pos + 1] & 0xFF); + + encode_pos += 2; + + if (upper_bound <= 0) { + encode_pos *= 8; + encode_pos += bw * index; + encode_pos = (encode_pos + 7) / 8; + + encode_pos *= 8; + encode_pos += bitWidthList[i] * index; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + int[] run_length = new int[index]; + int[] rle_values = new int[index]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bw, index, run_length); + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bitWidthList[i], index, + rle_values); + + int new_length = 0; + int rleIndex = 0; + int currentPos = 0; + int value = (upper_bound >> (i * beta)) & ((1 << beta) - 1); + + for (int j = 0; j < candidate_length; j++) { + int index_candidate = candidate_indices[j]; + + while (rleIndex < index && currentPos + run_length[rleIndex] <= index_candidate) { + currentPos += run_length[rleIndex]; + rleIndex++; + } + + if (rleIndex < index) { + if (rle_values[rleIndex] < value) { + result[result_length[0]] = block_size * block_index + index_candidate; + result_length[0]++; + } else if (rle_values[rleIndex] == value) { + candidate_indices[new_length] = index_candidate; + new_length++; + } + } + } + + candidate_length = new_length; + + } + } + + return encode_pos; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testQuery() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_block/"; + // String output_parent_dir = parent_dir + "result/query_vs_block/"; + + int[] block_size_list = { 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 }; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + HashMap<String, Integer> queryRange = new HashMap<>(); + + queryRange.put("Bird-migration", 2600000); + queryRange.put("Bitcoin-price", 170000000); + queryRange.put("City-temp", 700); + queryRange.put("Dewpoint-temp", 9600); + queryRange.put("IR-bio-temp", -200); + queryRange.put("PM10-dust", 2000); + queryRange.put("Stocks-DE", 90000); + queryRange.put("Stocks-UK", 30000); + queryRange.put("Stocks-USA", 6000); + queryRange.put("Wind-Speed", 60); + queryRange.put("Wine-Tasting", 10); + + int repeatTime = 200; + + // repeatTime = 1; + + for (int block_size : block_size_list) { + String outputPath = output_parent_dir + "subcolumn_query_less_parts_block_" + block_size + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + + int totalSize = data1.size(); + int halfSize = totalSize / 2; + + // 创建两个数据列 + int[] col1_data = new int[halfSize]; + int[] col2_data = new int[halfSize]; + + int max_mul = (int) Math.pow(10, max_decimal); + + // 填充第一列 + for (int i = 0; i < halfSize; i++) { + col1_data[i] = (int) (data1.get(i) * max_mul); + } + + // 填充第二列 + for (int i = 0; i < halfSize; i++) { + col2_data[i] = (int) (data1.get(i + halfSize) * max_mul); + } + + System.out.println(max_decimal); + + byte[] encoded_result1 = new byte[col1_data.length * 4]; + byte[] encoded_result2 = new byte[col2_data.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length1 = 0; + int length2 = 0; + + // 编码第一列 + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length1 = SubcolumnTest.Encoder(col1_data, block_size, encoded_result1); + } + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + + // 编码第二列 + s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length2 = SubcolumnTest.Encoder(col2_data, block_size, encoded_result2); + } + e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + + compressed_size = length1 + length2; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + ratio += ratioTmp; + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + QueryTwoColumns(encoded_result1, encoded_result2, + queryRange.get(datasetName), queryRange.get(datasetName)); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("block_size: " + block_size); + + System.out.println(ratio); + } + + writer.close(); + } + } + + @Test + public void testQueryBeta() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_beta/"; + // String output_parent_dir = parent_dir + "result/query_vs_beta/"; + + int[] beta_list = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + + int block_size = 512; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + HashMap<String, Integer> queryRange = new HashMap<>(); + + queryRange.put("Bird-migration", 2600000); + queryRange.put("Bitcoin-price", 170000000); + queryRange.put("City-temp", 700); + queryRange.put("Dewpoint-temp", 9600); + queryRange.put("IR-bio-temp", -200); + queryRange.put("PM10-dust", 2000); + queryRange.put("Stocks-DE", 90000); + queryRange.put("Stocks-UK", 30000); + queryRange.put("Stocks-USA", 6000); + queryRange.put("Wind-Speed", 60); + queryRange.put("Wine-Tasting", 10); + + int repeatTime = 200; + + // repeatTime = 1; + + for (int beta : beta_list) { + String outputPath = output_parent_dir + "subcolumn_query_less_parts_beta_" + beta + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + + int totalSize = data1.size(); + int halfSize = totalSize / 2; + + // 创建两个数据列 + int[] col1_data = new int[halfSize]; + int[] col2_data = new int[halfSize]; + + int max_mul = (int) Math.pow(10, max_decimal); + + // 填充第一列 + for (int i = 0; i < halfSize; i++) { + col1_data[i] = (int) (data1.get(i) * max_mul); + } + + // 填充第二列 + for (int i = 0; i < halfSize; i++) { + col2_data[i] = (int) (data1.get(i + halfSize) * max_mul); + } + + System.out.println(max_decimal); + + byte[] encoded_result1 = new byte[col1_data.length * 4]; + byte[] encoded_result2 = new byte[col2_data.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length1 = 0; + int length2 = 0; + + // 编码第一列 + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length1 = SubcolumnBetaTest.Encoder(col1_data, block_size, encoded_result1, beta); + } + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + + // 编码第二列 + s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length2 = SubcolumnBetaTest.Encoder(col2_data, block_size, encoded_result2, beta); + } + e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + + compressed_size = length1 + length2; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + ratio += ratioTmp; + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + QueryTwoColumns(encoded_result1, encoded_result2, + queryRange.get(datasetName), queryRange.get(datasetName)); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("beta: " + beta); + + System.out.println(ratio); + } + + writer.close(); + } + } +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryLessTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryLessTest.java new file mode 100644 index 0000000..caeef29 --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryLessTest.java
@@ -0,0 +1,514 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +public class SubcolumnQueryLessTest { + + public static void Query(byte[] encoded_result, int upper_bound) { + + int encode_pos = 0; + + int data_length = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) + | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int block_size = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + // 查询结果 + int[] result = new int[data_length]; + int[] result_length = new int[1]; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockQueryIndex(encoded_result, i, block_size, + block_size, encode_pos, upper_bound, + result, result_length); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + int value = ((encoded_result[encode_pos] & 0xFF) << 24) | + ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + if (value < upper_bound) { + result[result_length[0]] = value; + result_length[0]++; + } + encode_pos += 4; + } + } else { + encode_pos = BlockQueryIndex(encoded_result, num_blocks, block_size, + remainder, encode_pos, upper_bound, + result, result_length); + } + + } + + public static int BlockQueryIndex(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int upper_bound, int[] result, int[] result_length) { + int[] min_delta = new int[3]; + + min_delta[0] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + // int[] block_data = new int[remainder]; + + int m = encoded_result[encode_pos]; + encode_pos += 1; + + upper_bound -= min_delta[0]; + + // 候选索引列表,当前分列值和 upper_bound 相应值相等的索引 + int[] candidate_indices = new int[remainder]; + int candidate_length = 0; + for (int i = 0; i < remainder; i++) { + candidate_indices[i] = i; + candidate_length++; + } + + if (m == 0) { + if (upper_bound > 0) { + for (int i = 0; i < remainder; i++) { + result[result_length[0]] = block_size * block_index + i; + result_length[0]++; + } + } + return encode_pos; + } + + int bw = SubcolumnTest.bitWidth(block_size); + + int beta = encoded_result[encode_pos]; + encode_pos += 1; + + int l = (m + beta - 1) / beta; + + int[] bitWidthList = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 8, l, bitWidthList); + + int[][] subcolumnList = new int[l][remainder]; + + int[] encodingType = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 1, l, encodingType); + + for (int i = l - 1; i >= 0; i--) { + int type = encodingType[i]; + if (type == 0) { + + if (upper_bound <= 0) { + encode_pos *= 8; + encode_pos += bitWidthList[i] * remainder; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + encode_pos *= 8; + + int new_length = 0; + for (int j = 0; j < candidate_length; j++) { + int index = candidate_indices[j]; + + subcolumnList[i][index] = SubcolumnTest.bytesToInt(encoded_result, + encode_pos + index * bitWidthList[i], bitWidthList[i]); + int value = (upper_bound >> (i * beta)) & ((1 << beta) - 1); + if (subcolumnList[i][index] < value) { + result[result_length[0]] = block_size * block_index + index; + result_length[0]++; + } else if (subcolumnList[i][index] == value) { + candidate_indices[new_length] = index; + new_length++; + } + } + + candidate_length = new_length; + + encode_pos += remainder * bitWidthList[i]; + encode_pos = (encode_pos + 7) / 8; + + } else { + + int index = ((encoded_result[encode_pos] & 0xFF) << 8) | (encoded_result[encode_pos + 1] & 0xFF); + + encode_pos += 2; + + if (upper_bound <= 0) { + encode_pos *= 8; + encode_pos += bw * index; + encode_pos = (encode_pos + 7) / 8; + + encode_pos *= 8; + encode_pos += bitWidthList[i] * index; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + int[] run_length = new int[index]; + int[] rle_values = new int[index]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bw, index, run_length); + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bitWidthList[i], index, + rle_values); + + int new_length = 0; + int rleIndex = 0; + int currentPos = 0; + int value = (upper_bound >> (i * beta)) & ((1 << beta) - 1); + + for (int j = 0; j < candidate_length; j++) { + int index_candidate = candidate_indices[j]; + + while (rleIndex < index && currentPos + run_length[rleIndex] <= index_candidate) { + currentPos += run_length[rleIndex]; + rleIndex++; + } + + if (rleIndex < index) { + if (rle_values[rleIndex] < value) { + result[result_length[0]] = block_size * block_index + index_candidate; + result_length[0]++; + } else if (rle_values[rleIndex] == value) { + candidate_indices[new_length] = index_candidate; + new_length++; + } + } + } + + candidate_length = new_length; + + } + } + + return encode_pos; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testQuery() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_block/"; + // String output_parent_dir = parent_dir + "result/query_vs_block/"; + + int[] block_size_list = { 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 }; + + HashMap<String, Integer> queryRange = new HashMap<>(); + + queryRange.put("Bird-migration", 2600000); + queryRange.put("Bitcoin-price", 170000000); + queryRange.put("City-temp", 700); + queryRange.put("Dewpoint-temp", 9600); + queryRange.put("IR-bio-temp", -200); + queryRange.put("PM10-dust", 2000); + queryRange.put("Stocks-DE", 90000); + queryRange.put("Stocks-UK", 30000); + queryRange.put("Stocks-USA", 6000); + queryRange.put("Wind-Speed", 60); + queryRange.put("Wine-Tasting", 10); + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int block_size : block_size_list) { + String outputPath = output_parent_dir + "subcolumn_query_less_block_" + block_size + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) + max_decimal = cur_decimal; + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnTest.Encoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + Query(encoded_result, queryRange.get(datasetName)); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("block_size: " + block_size); + + System.out.println(ratio); + } + + writer.close(); + } + } + + @Test + public void testQueryBeta() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_beta/"; + // String output_parent_dir = parent_dir + "result/query_vs_beta/"; + + int[] beta_list = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + + int block_size = 512; + + HashMap<String, Integer> queryRange = new HashMap<>(); + + queryRange.put("Bird-migration", 2600000); + queryRange.put("Bitcoin-price", 170000000); + queryRange.put("City-temp", 700); + queryRange.put("Dewpoint-temp", 9600); + queryRange.put("IR-bio-temp", -200); + queryRange.put("PM10-dust", 2000); + queryRange.put("Stocks-DE", 90000); + queryRange.put("Stocks-UK", 30000); + queryRange.put("Stocks-USA", 6000); + queryRange.put("Wind-Speed", 60); + queryRange.put("Wine-Tasting", 10); + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int beta : beta_list) { + String outputPath = output_parent_dir + "subcolumn_query_less_beta_" + beta + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) + max_decimal = cur_decimal; + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnBetaTest.Encoder(data2_arr, block_size, encoded_result, beta); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + SubcolumnQueryLessTest.Query(encoded_result, queryRange.get(datasetName)); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("beta: " + beta); + + System.out.println(ratio); + } + + writer.close(); + } + } + +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryMaxTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryMaxTest.java new file mode 100644 index 0000000..483045b --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQueryMaxTest.java
@@ -0,0 +1,511 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +public class SubcolumnQueryMaxTest { + + public static void Query(byte[] encoded_result) { + + int encode_pos = 0; + + int data_length = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) + | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int block_size = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + // 查询结果 + int[] result = new int[data_length]; + int[] result_length = new int[1]; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockQueryMax(encoded_result, i, block_size, block_size, encode_pos, result, + result_length); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + int value = ((encoded_result[encode_pos] & 0xFF) << 24) | + ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + result[result_length[0]] = value; + result_length[0]++; + encode_pos += 4; + } + } else { + encode_pos = BlockQueryMax(encoded_result, num_blocks, block_size, remainder, encode_pos, + result, result_length); + } + + // for (int i = 0; i < result_length[0]; i++) { + // System.out.print(result[i] + " "); + // } + // System.out.println(); + + } + + public static int BlockQueryMax(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int[] result, int[] result_length) { + int[] min_delta = new int[3]; + + min_delta[0] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int m = encoded_result[encode_pos]; + encode_pos += 1; + + // System.out.println("m: " + m); + + int[] candidate_indices = new int[remainder]; + int candidate_length = 0; + for (int i = 0; i < remainder; i++) { + candidate_indices[i] = i; + candidate_length++; + } + + if (m == 0) { + result[result_length[0]] = min_delta[0]; + result_length[0]++; + return encode_pos; + } + + int bw = SubcolumnTest.bitWidth(block_size); + + int beta = encoded_result[encode_pos]; + encode_pos += 1; + + // System.out.println("beta: " + beta); + + int l = (m + beta - 1) / beta; + + int[] bitWidthList = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 8, l, bitWidthList); + + // int[][] subcolumnList = new int[l][remainder]; + + int[] encodingType = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 1, l, encodingType); + + for (int i = l - 1; i >= 0; i--) { + int type = encodingType[i]; + if (type == 0) { + if (candidate_length == 1) { + encode_pos *= 8; + encode_pos += bitWidthList[i] * remainder; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + int maxPart = 0; + + int new_length = 0; + for (int j = 0; j < candidate_length; j++) { + int index = candidate_indices[j]; + int value = SubcolumnTest.bytesToInt(encoded_result, + encode_pos * 8 + index * bitWidthList[i], bitWidthList[i]); + + if (value > maxPart) { + maxPart = value; + + new_length = 0; + candidate_indices[new_length] = index; + new_length++; + } else if (value == maxPart) { + candidate_indices[new_length] = index; + new_length++; + } + } + + encode_pos *= 8; + encode_pos += bitWidthList[i] * remainder; + encode_pos = (encode_pos + 7) / 8; + + // for (int j = 0; j < candidate_length; j++) { + // int index = candidate_indices[j]; + // if (subcolumnList[i][index] == maxPart) { + // candidate_indices[new_length] = index; + // new_length++; + // } + // } + + candidate_length = new_length; + + } else { + int index = ((encoded_result[encode_pos] & 0xFF) << 8) | (encoded_result[encode_pos + 1] & 0xFF); + encode_pos += 2; + + if (candidate_length == 1) { + encode_pos *= 8; + encode_pos += bw * index; + encode_pos = (encode_pos + 7) / 8; + + encode_pos *= 8; + encode_pos += bitWidthList[i] * index; + encode_pos = (encode_pos + 7) / 8; + + continue; + } + + int[] run_length = new int[index]; + int[] rle_values = new int[index]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bw, index, run_length); + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bitWidthList[i], index, + rle_values); + + int maxPart = 0; + + int new_length = 0; + int rleIndex = 0; + int currentPos = 0; + + for (int j = 0; j < candidate_length; j++) { + int index_candidate = candidate_indices[j]; + + while (rleIndex < index && currentPos + run_length[rleIndex] <= index_candidate) { + currentPos += run_length[rleIndex]; + rleIndex++; + } + + if (rleIndex < index) { + if (rle_values[rleIndex] > maxPart) { + maxPart = rle_values[rleIndex]; + + new_length = 0; + } else if (rle_values[rleIndex] == maxPart) { + candidate_indices[new_length] = index_candidate; + new_length++; + } + } + } + + // for (int j = 0; j < candidate_length; j++) { + // int index_candidate = candidate_indices[j]; + + // while (rleIndex < index && currentPos + run_length[rleIndex] <= + // index_candidate) { + // currentPos += run_length[rleIndex]; + // rleIndex++; + // } + + // if (rleIndex < index) { + // if (rle_values[rleIndex] == maxPart) { + // candidate_indices[new_length] = index_candidate; + // new_length++; + // } + // } + // } + + candidate_length = new_length; + } + } + + result[result_length[0]] = candidate_indices[0]; + result_length[0]++; + + return encode_pos; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testQuery() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_block/"; + // String output_parent_dir = parent_dir + "result/query_vs_block/"; + + int[] block_size_list = { 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 }; + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int block_size : block_size_list) { + String outputPath = output_parent_dir + "subcolumn_query_max_block_" + block_size + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) + max_decimal = cur_decimal; + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnTest.Encoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + Query(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("block_size: " + block_size); + + System.out.println(ratio); + } + + writer.close(); + } + } + + @Test + public void testQueryBeta() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_beta/"; + // String output_parent_dir = parent_dir + "result/query_vs_beta/"; + + int[] beta_list = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + + int block_size = 512; + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int beta : beta_list) { + String outputPath = output_parent_dir + "subcolumn_query_max_beta_" + beta + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) + max_decimal = cur_decimal; + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnBetaTest.Encoder(data2_arr, block_size, encoded_result, beta); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + SubcolumnQueryMaxTest.Query(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("beta: " + beta); + + System.out.println(ratio); + } + + writer.close(); + } + } + +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQuerySum2Test.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQuerySum2Test.java new file mode 100644 index 0000000..f840e90 --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQuerySum2Test.java
@@ -0,0 +1,504 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +public class SubcolumnQuerySum2Test { + + public static void Query(byte[] encoded_result, int target) { + + int encode_pos = 0; + + int data_length = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) + | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int block_size = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + // 查询结果 + int[] result = new int[data_length]; + int[] result_length = new int[1]; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockQuerySum(encoded_result, i, block_size, block_size, encode_pos, target, result, + result_length); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + int value = ((encoded_result[encode_pos] & 0xFF) << 24) | + ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + result[result_length[0]] = value; + result_length[0]++; + } + } else { + encode_pos = BlockQuerySum(encoded_result, num_blocks, block_size, remainder, encode_pos, target, + result, result_length); + } + + // for (int i = 0; i < result_length[0]; i++) { + // System.out.print(result[i] + " "); + // } + // System.out.println(); + + } + + public static int BlockQuerySum(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int target, int[] result, int[] result_length) { + int[] min_delta = new int[3]; + + min_delta[0] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int m = encoded_result[encode_pos]; + encode_pos += 1; + + target -= min_delta[0]; + + int[] candidate_indices = new int[remainder]; + int candidate_length = 0; + for (int i = 0; i < remainder; i++) { + candidate_indices[i] = i; + candidate_length++; + } + + if (m == 0) { + if (target == 0) { + result[result_length[0]] = min_delta[0] * remainder; + result_length[0]++; + } + return encode_pos; + } + + int bw = SubcolumnTest.bitWidth(block_size); + + int beta = encoded_result[encode_pos]; + encode_pos += 1; + + int l = (m + beta - 1) / beta; + + int[] bitWidthList = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 8, l, bitWidthList); + + int[][] subcolumnList = new int[l][remainder]; + + int[] encodingType = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 1, l, encodingType); + + for (int i = l - 1; i >= 0; i--) { + int type = encodingType[i]; + if (type == 0) { + + if (target < 0) { + encode_pos *= 8; + encode_pos += bitWidthList[i] * remainder; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + encode_pos *= 8; + + int new_length = 0; + for (int j = 0; j < candidate_length; j++) { + int index = candidate_indices[j]; + + subcolumnList[i][index] = SubcolumnTest.bytesToInt(encoded_result, + encode_pos + index * bitWidthList[i], bitWidthList[i]); + int value = (target >> (i * beta)) & ((1 << beta) - 1); + + if (subcolumnList[i][index] == value) { + candidate_indices[new_length] = index; + new_length++; + } + } + + candidate_length = new_length; + + encode_pos += remainder * bitWidthList[i]; + encode_pos = (encode_pos + 7) / 8; + + } else { + int index = ((encoded_result[encode_pos] & 0xFF) << 8) | (encoded_result[encode_pos + 1] & 0xFF); + + encode_pos += 2; + + if (target < 0) { + encode_pos *= 8; + encode_pos += bw * index; + encode_pos = (encode_pos + 7) / 8; + + encode_pos *= 8; + encode_pos += bitWidthList[i] * index; + encode_pos = (encode_pos + 7) / 8; + continue; + } + + int[] run_length = new int[index]; + int[] rle_values = new int[index]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bw, index, run_length); + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bitWidthList[i], index, + rle_values); + + int new_length = 0; + int rleIndex = 0; + int currentPos = 0; + int value = (target >> (i * beta)) & ((1 << beta) - 1); + + for (int j = 0; j < candidate_length; j++) { + int index_candidate = candidate_indices[j]; + + while (rleIndex < index && currentPos + run_length[rleIndex] <= index_candidate) { + currentPos += run_length[rleIndex]; + rleIndex++; + } + + if (rleIndex < index) { + if (rle_values[rleIndex] == value) { + candidate_indices[new_length] = index_candidate; + new_length++; + } + } + } + + candidate_length = new_length; + } + } + + result[result_length[0]] = (min_delta[0] + target) * candidate_length; + result_length[0]++; + + return encode_pos; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testQuery() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_block/"; + // String output_parent_dir = parent_dir + "result/query_vs_block/"; + + int[] block_size_list = { 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 }; + + HashMap<String, Integer> queryRange = new HashMap<>(); + queryRange.put("Bird-migration", 2600000); + queryRange.put("Bitcoin-price", 170000000); + queryRange.put("City-temp", 700); + queryRange.put("Dewpoint-temp", 9600); + queryRange.put("IR-bio-temp", -200); + queryRange.put("PM10-dust", 2000); + queryRange.put("Stocks-DE", 90000); + queryRange.put("Stocks-UK", 30000); + queryRange.put("Stocks-USA", 6000); + queryRange.put("Wind-Speed", 60); + queryRange.put("Wine-Tasting", 10); + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int block_size : block_size_list) { + String outputPath = output_parent_dir + "subcolumn_query_sum2_block_" + block_size + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) + max_decimal = cur_decimal; + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnTest.Encoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + Query(encoded_result, queryRange.get(datasetName)); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("block_size: " + block_size); + + System.out.println(ratio); + } + + writer.close(); + } + } + + @Test + public void testQueryBeta() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_beta/"; + // String output_parent_dir = parent_dir + "result/query_vs_beta/"; + + int[] beta_list = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + + int block_size = 512; + + HashMap<String, Integer> queryRange = new HashMap<>(); + + queryRange.put("Bird-migration", 2600000); + queryRange.put("Bitcoin-price", 170000000); + queryRange.put("City-temp", 700); + queryRange.put("Dewpoint-temp", 9600); + queryRange.put("IR-bio-temp", -200); + queryRange.put("PM10-dust", 2000); + queryRange.put("Stocks-DE", 90000); + queryRange.put("Stocks-UK", 30000); + queryRange.put("Stocks-USA", 6000); + queryRange.put("Wind-Speed", 60); + queryRange.put("Wine-Tasting", 10); + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int beta : beta_list) { + String outputPath = output_parent_dir + "subcolumn_query_sum2_beta_" + beta + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) + max_decimal = cur_decimal; + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnBetaTest.Encoder(data2_arr, block_size, encoded_result, beta); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + SubcolumnQuerySum2Test.Query(encoded_result, queryRange.get(datasetName)); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("beta: " + beta); + + System.out.println(ratio); + } + + writer.close(); + } + } +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQuerySumTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQuerySumTest.java new file mode 100644 index 0000000..e7efa97 --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnQuerySumTest.java
@@ -0,0 +1,418 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +public class SubcolumnQuerySumTest { + + public static void Query(byte[] encoded_result) { + + int encode_pos = 0; + + int data_length = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) + | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int block_size = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + // 查询结果 + int[] result = new int[data_length]; + int[] result_length = new int[1]; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockQuerySum(encoded_result, i, block_size, block_size, encode_pos, result, + result_length); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + int value = ((encoded_result[encode_pos] & 0xFF) << 24) | + ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + result[result_length[0]] = value; + result_length[0]++; + } + } else { + encode_pos = BlockQuerySum(encoded_result, num_blocks, block_size, remainder, encode_pos, + result, result_length); + } + + // for (int i = 0; i < result_length[0]; i++) { + // System.out.print(result[i] + " "); + // } + // System.out.println(); + + } + + public static int BlockQuerySum(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int[] result, int[] result_length) { + int[] min_delta = new int[3]; + + min_delta[0] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int m = encoded_result[encode_pos]; + encode_pos += 1; + + if (m == 0) { + result[result_length[0]] = min_delta[0]; + result_length[0]++; + return encode_pos; + } + + int bw = SubcolumnTest.bitWidth(block_size); + + int beta = encoded_result[encode_pos]; + encode_pos += 1; + + int l = (m + beta - 1) / beta; + + int[] bitWidthList = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 8, l, bitWidthList); + + int[] encodingType = new int[l]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, 1, l, encodingType); + + for (int i = l - 1; i >= 0; i--) { + int type = encodingType[i]; + if (type == 0) { + + encode_pos *= 8; + + for (int j = 0; j < remainder; j++) { + int value = SubcolumnTest.bytesToInt(encoded_result, encode_pos + j * bitWidthList[i], + bitWidthList[i]); + result[result_length[0]] += value << (i * beta); + } + + encode_pos += remainder * bitWidthList[i]; + encode_pos = (encode_pos + 7) / 8; + + } else { + int index = ((encoded_result[encode_pos] & 0xFF) << 8) | (encoded_result[encode_pos + 1] & 0xFF); + + encode_pos += 2; + + int[] run_length = new int[index]; + int[] rle_values = new int[index]; + + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bw, index, run_length); + encode_pos = SubcolumnTest.decodeBitPacking(encoded_result, encode_pos, bitWidthList[i], index, + rle_values); + + for (int j = 0; j < index; j++) { + int runCount = j == 0 ? run_length[j] : run_length[j] - run_length[j - 1]; + result[result_length[0]] += (rle_values[j] << (i * beta)) * runCount; + // result[result_length[0]] += (rle_values[j] << (i * beta)) * run_length[j]; + } + } + } + + result_length[0]++; + + return encode_pos; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testQuery() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_block/"; + // String output_parent_dir = parent_dir + "result/query_vs_block/"; + + int[] block_size_list = { 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 }; + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int block_size : block_size_list) { + String outputPath = output_parent_dir + "subcolumn_query_sum_block_" + block_size + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) + max_decimal = cur_decimal; + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnTest.Encoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + Query(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("block_size: " + block_size); + + System.out.println(ratio); + } + + writer.close(); + } + } + + @Test + public void testQueryBeta() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/query_vs_beta/"; + // String output_parent_dir = parent_dir + "result/query_vs_beta/"; + + int[] beta_list = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + + int block_size = 512; + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + for (int beta : beta_list) { + String outputPath = output_parent_dir + "subcolumn_query_sum_beta_" + beta + ".csv"; + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) + max_decimal = cur_decimal; + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = SubcolumnBetaTest.Encoder(data2_arr, block_size, encoded_result, beta); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + System.out.println("Query"); + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + SubcolumnQuerySumTest.Query(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + + System.out.println("beta: " + beta); + + System.out.println(ratio); + } + + writer.close(); + } + } + +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnTest.java new file mode 100644 index 0000000..668b904 --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/SubcolumnTest.java
@@ -0,0 +1,987 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +import static org.junit.Assert.assertEquals; + +public class SubcolumnTest { + + public static int bitWidth(int value) { + return 32 - Integer.numberOfLeadingZeros(value); + } + + public static void intToBytes(int srcNum, byte[] result, int pos, int width) { + int cnt = pos & 0x07; + int index = pos >> 3; + while (width > 0) { + int m = width + cnt >= 8 ? 8 - cnt : width; + width -= m; + int mask = 1 << (8 - cnt); + cnt += m; + byte y = (byte) (srcNum >>> width); + y = (byte) (y << (8 - cnt)); + mask = ~(mask - (1 << (8 - cnt))); + result[index] = (byte) (result[index] & mask | y); + srcNum = srcNum & ~(-1 << width); + if (cnt == 8) { + index++; + cnt = 0; + } + } + } + + public static int bytesToInt(byte[] result, int pos, int width) { + int ret = 0; + int cnt = pos & 0x07; + int index = pos >> 3; + while (width > 0) { + int m = width + cnt >= 8 ? 8 - cnt : width; + width -= m; + ret = ret << m; + byte y = (byte) (result[index] & (0xff >> cnt)); + y = (byte) ((y & 0xff) >>> (8 - cnt - m)); + ret = ret | (y & 0xff); + cnt += m; + if (cnt == 8) { + cnt = 0; + index++; + } + } + return ret; + } + + public static void boolToBytes(boolean value, byte[] result, int pos) { + int byteIndex = pos >> 3; + int bitOffset = pos & 0x07; + + if (value) { + result[byteIndex] |= (1 << (7 - bitOffset)); + } else { + result[byteIndex] &= ~(1 << (7 - bitOffset)); + } + } + + public static boolean bytesToBool(byte[] result, int pos) { + int byteIndex = pos >> 3; + int bitOffset = pos & 0x07; + + return (result[byteIndex] & (1 << (7 - bitOffset))) != 0; + } + + public static void pack8Values(int[] values, int offset, int width, int encode_pos, + byte[] encoded_result) { + int bufIdx = 0; + int valueIdx = offset; + // remaining bits for the current unfinished Integer + int leftBit = 0; + + while (valueIdx < 8 + offset) { + // buffer is used for saving 32 bits as a part of result + int buffer = 0; + // remaining size of bits in the 'buffer' + int leftSize = 32; + + // encode the left bits of current Integer to 'buffer' + if (leftBit > 0) { + buffer |= (values[valueIdx] << (32 - leftBit)); + leftSize -= leftBit; + leftBit = 0; + valueIdx++; + } + + while (leftSize >= width && valueIdx < 8 + offset) { + // encode one Integer to the 'buffer' + buffer |= (values[valueIdx] << (leftSize - width)); + leftSize -= width; + valueIdx++; + } + // If the remaining space of the buffer can not save the bits for one Integer, + if (leftSize > 0 && valueIdx < 8 + offset) { + // put the first 'leftSize' bits of the Integer into remaining space of the + // buffer + buffer |= (values[valueIdx] >>> (width - leftSize)); + leftBit = width - leftSize; + } + + // put the buffer into the final result + for (int j = 0; j < 4; j++) { + encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); + encode_pos++; + bufIdx++; + if (bufIdx >= width) { + return; + } + } + } + + } + + public static void unpack8Values(byte[] encoded, int offset, int width, int[] result_list, int result_offset) { + int byteIdx = offset; + long buffer = 0; + // total bits which have read from 'buf' to 'buffer'. i.e., + // number of available bits to be decoded. + int totalBits = 0; + int valueIdx = 0; + + while (valueIdx < 8) { + // If current available bits are not enough to decode one Integer, + // then add next byte from buf to 'buffer' until totalBits >= width + while (totalBits < width) { + buffer = (buffer << 8) | (encoded[byteIdx] & 0xFF); + byteIdx++; + totalBits += 8; + } + + // If current available bits are enough to decode one Integer, + // then decode one Integer one by one until left bits in 'buffer' is + // not enough to decode one Integer. + while (totalBits >= width && valueIdx < 8) { + // result_list.add((int) (buffer >>> (totalBits - width))); + result_list[result_offset + valueIdx] = (int) (buffer >>> (totalBits - width)); + valueIdx++; + totalBits -= width; + buffer = buffer & ((1L << totalBits) - 1); + } + } + } + + public static int bitPacking(int[] numbers, int bit_width, int encode_pos, + byte[] encoded_result, int num_values) { + int block_num = num_values / 8; + int remainder = num_values % 8; + + for (int i = 0; i < block_num; i++) { + pack8Values(numbers, i * 8, bit_width, encode_pos, encoded_result); + encode_pos += bit_width; + } + + encode_pos *= 8; + + for (int i = 0; i < remainder; i++) { + intToBytes(numbers[block_num * 8 + i], encoded_result, encode_pos, bit_width); + encode_pos += bit_width; + } + + return (encode_pos + 7) / 8; + } + + public static int decodeBitPacking( + byte[] encoded, int decode_pos, int bit_width, int num_values, int[] result_list) { + // ArrayList<Integer> result_list = new ArrayList<>(); + // int[] result_list = new int[num_values]; + int block_num = num_values / 8; + int remainder = num_values % 8; + + for (int i = 0; i < block_num; i++) { // bitpacking + unpack8Values(encoded, decode_pos, bit_width, result_list, i * 8); + decode_pos += bit_width; + } + + decode_pos *= 8; + + for (int i = 0; i < remainder; i++) { + result_list[block_num * 8 + i] = bytesToInt(encoded, decode_pos, bit_width); + decode_pos += bit_width; + } + + return (decode_pos + 7) / 8; + } + + public static void int2Bytes(int integer, int encode_pos, byte[] cur_byte) { + cur_byte[encode_pos] = (byte) (integer >> 24); + cur_byte[encode_pos + 1] = (byte) (integer >> 16); + cur_byte[encode_pos + 2] = (byte) (integer >> 8); + cur_byte[encode_pos + 3] = (byte) (integer); + } + + public static void intByte2Bytes(int integer, int encode_pos, byte[] cur_byte) { + cur_byte[encode_pos] = (byte) (integer); + } + + public static void long2intBytes(long integer, int encode_pos, byte[] cur_byte) { + cur_byte[encode_pos] = (byte) (integer >> 24); + cur_byte[encode_pos + 1] = (byte) (integer >> 16); + cur_byte[encode_pos + 2] = (byte) (integer >> 8); + cur_byte[encode_pos + 3] = (byte) (integer); + } + + public static int bytes2Integer(byte[] encoded, int start, int num) { + int value = 0; + + for (int i = 0; i < num; i++) { + value <<= 8; + int b = encoded[i + start] & 0xFF; + value |= b; + } + return value; + } + + public static long bytesLong2Integer(byte[] encoded, int decode_pos) { + long value = 0; + for (int i = 0; i < 4; i++) { + value <<= 8; + int b = encoded[i + decode_pos] & 0xFF; + value |= b; + } + return value; + } + + public static int Subcolumn(int[] x, int x_length, int m, int block_size) { + + int betaBest = 1; + + int cMin = Integer.MAX_VALUE; + + // int[] beta_list = {1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31}; + // int[] beta_list = { 1, 2, 3, 5, 7, 11 }; + // int[] beta_list = { 1, 2, 3, 4 }; + int[] beta_list = { 2, 3, 4 }; + + int bw = bitWidth(block_size); + + int[] bitWidthListList = new int[m]; + + for (int beta : beta_list) { + if (beta > m) { + break; + } + // System.out.println("beta: " + beta); + + int l = (m + beta - 1) / beta; + + // System.out.println("l: " + l); + + int[][] subcolumnList = new int[l][x_length]; + + int cost = 0; + + for (int i = 0; i < l; i++) { + int maxValuePart = 0; + for (int j = 0; j < x_length; j++) { + subcolumnList[i][j] = (x[j] >> (i * beta)) & ((1 << beta) - 1); + if (subcolumnList[i][j] > maxValuePart) { + maxValuePart = subcolumnList[i][j]; + } + } + bitWidthListList[i] = bitWidth(maxValuePart); + } + + for (int i = 0; i < l; i++) { + int bpCost = bitWidthListList[i] * x_length; + int rleCost = 0; + + // int count = 1; + int currentNumber = subcolumnList[i][0]; + + int index = 0; + + boolean bpBest = false; + + for (int j = 1; j < x_length; j++) { + if (subcolumnList[i][j] != currentNumber) { + index++; + currentNumber = subcolumnList[i][j]; + } + + if (bw * index + bitWidthListList[i] * index >= bpCost) { + bpBest = true; + break; + } + } + + if (bpBest) { + cost += bpCost; + continue; + } + + index++; + + // System.out.println("index: " + index); + + rleCost = bw * index + bitWidthListList[i] * index; + + // System.out.println("bpCost: " + bpCost + " rleCost: " + rleCost); + + if (bpCost <= rleCost) { + cost += bpCost; + } else { + cost += rleCost; + } + } + + // System.out.println("cost: " + cost); + + if (cost < cMin) { + cMin = cost; + betaBest = beta; + } + } + + return betaBest; + } + + public static int SubcolumnEncoder(int[] list, int encode_pos, byte[] encoded_result, int[] beta, int block_size) { + int list_length = list.length; + int maxValue = 0; + for (int i = 0; i < list_length; i++) { + if (list[i] > maxValue) { + maxValue = list[i]; + } + } + + int m = bitWidth(maxValue); + + intByte2Bytes(m, encode_pos, encoded_result); + encode_pos += 1; + + if (m == 0) { + return encode_pos; + } + + // int[] bitWidthList = new int[m]; + + // int[][] subcolumnList = new int[m][list_length]; + + int l; + + // int betaBest = beta[0]; + // byte betaBest = (byte) beta[0]; + + l = (m + beta[0] - 1) / beta[0]; + + int[] bitWidthList = new int[l]; + + int[][] subcolumnList = new int[l][list_length]; + + intByte2Bytes(beta[0], encode_pos, encoded_result); + encode_pos += 1; + + int bw = bitWidth(block_size); + int mask = (1 << beta[0]) - 1; + + for (int i = 0; i < l; i++) { + int maxValuePart = 0; + int shiftAmount = i * beta[0]; + for (int j = 0; j < list_length; j++) { + subcolumnList[i][j] = (list[j] >> shiftAmount) & mask; + if (subcolumnList[i][j] > maxValuePart) { + maxValuePart = subcolumnList[i][j]; + } + } + bitWidthList[i] = bitWidth(maxValuePart); + } + + encode_pos = bitPacking(bitWidthList, 8, encode_pos, encoded_result, l); + + int[] encodingType = new int[l]; + + // encoded_result 预留大小为 (l + 7) / 8 的大小,存储每个分列的类型 + int preTypePos = encode_pos; + encode_pos += (l + 7) / 8; + + for (int i = l - 1; i >= 0; i--) { + // 对于每个分列,计算使用 bit packing 还是 rle + int bpCost = bitWidthList[i] * list_length; + int rleCost = 0; + + int previous = subcolumnList[i][0]; + int index = 0; + + for (int j = 1; j < list_length; j++) { + int currentNumber = subcolumnList[i][j]; + if (currentNumber != previous) { + index++; + previous = currentNumber; + } + + if (bw * index + bitWidthList[i] * index >= bpCost) { + break; + } + } + + index++; + + rleCost = bw * index + bitWidthList[i] * index; + + if (bpCost <= rleCost) { + encodingType[i] = 0; + + encode_pos = bitPacking(subcolumnList[i], bitWidthList[i], encode_pos, encoded_result, list_length); + + } else { + encodingType[i] = 1; + + encoded_result[encode_pos] = (byte) (index >> 8); + encode_pos += 1; + encoded_result[encode_pos] = (byte) (index & 0xFF); + encode_pos += 1; + + index = 0; + int[] run_length = new int[list_length]; + int[] rle_values = new int[list_length]; + previous = subcolumnList[i][0]; + + for (int j = 1; j < list_length; j++) { + int currentNumber = subcolumnList[i][j]; + if (currentNumber != previous) { + run_length[index] = j; + rle_values[index] = previous; + index++; + previous = currentNumber; + } + } + + run_length[index] = list_length; + rle_values[index] = previous; + index++; + + encode_pos = bitPacking(run_length, bw, encode_pos, encoded_result, index); + + encode_pos = bitPacking(rle_values, bitWidthList[i], encode_pos, encoded_result, index); + + } + + } + + preTypePos = bitPacking(encodingType, 1, preTypePos, encoded_result, l); + + return encode_pos; + } + + public static int SubcolumnDecoder(byte[] encoded_result, int encode_pos, int[] list, int block_size) { + int list_length = list.length; + + // int m = encoded_result[encode_pos]; + int m = bytes2Integer(encoded_result, encode_pos, 1); + encode_pos += 1; + + if (m == 0) { + return encode_pos; + } + + int bw = bitWidth(block_size); + + int beta = bytes2Integer(encoded_result, encode_pos, 1); + encode_pos += 1; + + int l = (m + beta - 1) / beta; + + int[] bitWidthList = new int[l]; + + encode_pos = decodeBitPacking(encoded_result, encode_pos, 8, l, bitWidthList); + + int[][] subcolumnList = new int[l][list_length]; + + int[] encodingType = new int[l]; + + encode_pos = decodeBitPacking(encoded_result, encode_pos, 1, l, encodingType); + + for (int i = l - 1; i >= 0; i--) { + int type = encodingType[i]; + int bitWidth = bitWidthList[i]; + if (type == 0) { + encode_pos = decodeBitPacking(encoded_result, encode_pos, bitWidth, list_length, + subcolumnList[i]); + } else { + int index = ((encoded_result[encode_pos] & 0xFF) << 8) | (encoded_result[encode_pos + 1] & 0xFF); + + encode_pos += 2; + + int[] run_length = new int[index]; + int[] rle_values = new int[index]; + + encode_pos = decodeBitPacking(encoded_result, encode_pos, bw, index, run_length); + encode_pos = decodeBitPacking(encoded_result, encode_pos, bitWidth, index, rle_values); + + int currentIndex = 0; + for (int j = 0; j < index; j++) { + int endPos = run_length[j]; + int value = rle_values[j]; + while (currentIndex < endPos) { + subcolumnList[i][currentIndex] = value; + currentIndex++; + } + } + } + } + + for (int i = 0; i < l; i++) { + int shiftAmount = i * beta; + for (int j = 0; j < list_length; j++) { + list[j] |= subcolumnList[i][j] << shiftAmount; + } + } + + return encode_pos; + } + + public static int[] getAbsDeltaTsBlock( + int[] ts_block, + int i, + int block_size, + int remaining, + int[] min_delta) { + int[] ts_block_delta = new int[remaining]; + + int value_delta_min = Integer.MAX_VALUE; + int value_delta_max = Integer.MIN_VALUE; + int base = i * block_size; + int end = i * block_size + remaining; + + for (int j = base; j < end; j++) { + int cur = ts_block[j]; + if (cur < value_delta_min) { + value_delta_min = cur; + } + if (cur > value_delta_max) { + value_delta_max = cur; + } + } + + for (int j = base; j < end; j++) { + ts_block_delta[j - base] = ts_block[j] - value_delta_min; + } + + min_delta[0] = value_delta_min; + + return ts_block_delta; + } + + public static int BlockEncoder(int[] data, int block_index, int block_size, int remainder, + int encode_pos, byte[] encoded_result, int[] beta) { + int[] min_delta = new int[3]; + + int[] data_delta = getAbsDeltaTsBlock(data, block_index, block_size, + remainder, min_delta); + + int2Bytes(min_delta[0], encode_pos, encoded_result); + encode_pos += 4; + + if (block_index == 0) { + int maxValue = 0; + for (int j = 0; j < remainder; j++) { + if (data_delta[j] > maxValue) { + maxValue = data_delta[j]; + } + } + int m = bitWidth(maxValue); + + beta[0] = Subcolumn(data_delta, remainder, m, block_size); + } + + encode_pos = SubcolumnEncoder(data_delta, encode_pos, + encoded_result, beta, block_size); + + return encode_pos; + } + + public static int BlockDecoder(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int[] data) { + int[] min_delta = new int[3]; + + min_delta[0] = bytes2Integer(encoded_result, encode_pos, 4); + encode_pos += 4; + + int[] block_data = new int[remainder]; + + encode_pos = SubcolumnDecoder(encoded_result, encode_pos, + block_data, block_size); + + for (int i = 0; i < remainder; i++) { + data[block_index * block_size + i] = block_data[i] + min_delta[0]; + } + + return encode_pos; + } + + public static int Encoder(int[] data, int block_size, byte[] encoded_result) { + int data_length = data.length; + int encode_pos = 0; + + int2Bytes(data_length, encode_pos, encoded_result); + encode_pos += 4; + + int2Bytes(block_size, encode_pos, encoded_result); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + int remainder = data_length % block_size; + + int[] beta = new int[1]; + beta[0] = 2; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockEncoder(data, i, block_size, block_size, encode_pos, encoded_result, beta); + } + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + int value = data[num_blocks * block_size + i]; + int2Bytes(value, encode_pos, encoded_result); + encode_pos += 4; + } + } else { + encode_pos = BlockEncoder(data, num_blocks, block_size, remainder, encode_pos, + encoded_result, beta); + } + + // System.out.println("beta: " + beta[0]); + + return encode_pos; + } + + public static int[] Decoder(byte[] encoded_result) { + int encode_pos = 0; + + int data_length = bytes2Integer(encoded_result, encode_pos, 4); + encode_pos += 4; + + int block_size = bytes2Integer(encoded_result, encode_pos, 4); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + int[] data = new int[data_length]; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockDecoder(encoded_result, i, block_size, block_size, encode_pos, data); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + data[num_blocks * block_size + i] = bytes2Integer(encoded_result, encode_pos, 4); + encode_pos += 4; + } + } else { + encode_pos = BlockDecoder(encoded_result, num_blocks, block_size, remainder, + encode_pos, data); + } + + return data; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testSubcolumn() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/"; + // String output_parent_dir = parent_dir + "result/"; + + String outputPath = output_parent_dir + "subcolumn.csv"; + + int block_size = 512; + + int repeatTime = 100; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = Encoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + ratio += ratioTmp; + + System.out.println("Decode"); + + int[] data2_arr_decoded = new int[data2_arr.length]; + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + data2_arr_decoded = Decoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + for (int i = 0; i < data2_arr_decoded.length; i++) { + assertEquals(data2_arr[i], data2_arr_decoded[i]); + } + + String[] record = { + datasetName, + "Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + System.out.println(ratio); + } + + writer.close(); + } + + @Test + public void testTransData() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String output_parent_dir = "D:/encoding-subcolumn/trans_data_result/"; + // String output_parent_dir = parent_dir + "trans_data_result/"; + + String input_parent_dir = parent_dir + "trans_data/"; + + ArrayList<String> input_path_list = new ArrayList<>(); + ArrayList<String> output_path_list = new ArrayList<>(); + ArrayList<String> dataset_name = new ArrayList<>(); + ArrayList<Integer> dataset_block_size = new ArrayList<>(); + + try (Stream<Path> paths = Files.walk(Paths.get(input_parent_dir))) { + paths.filter(Files::isDirectory) + .filter(path -> !path.equals(Paths.get(input_parent_dir))) + .forEach(dir -> { + String name = dir.getFileName().toString(); + dataset_name.add(name); + input_path_list.add(dir.toString()); + dataset_block_size.add(1024); + }); + } + + String outputPath = output_parent_dir + "subcolumn.csv"; + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + int repeatTime = 100; + + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { + + String inputPath = input_path_list.get(file_i); + System.out.println(inputPath); + + File file = new File(inputPath); + File[] tempList = file.listFiles(); + + // CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); + // writer.setRecordDelimiter('\n'); + + // String[] head = { + // "Input Direction", + // "Encoding Algorithm", + // "Encoding Time", + // "Decoding Time", + // "Points", + // "Compressed Size", + // "Compression Ratio" + // }; + // writer.writeRecord(head); + + long totalEncodeTime = 0; + long totalDecodeTime = 0; + double totalCompressedSize = 0; + int totalPoints = 0; + + for (File f : tempList) { + String datasetName = extractFileName(f.toString()); + InputStream inputStream = Files.newInputStream(f.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Integer> data1 = new ArrayList<>(); + ArrayList<Integer> data2 = new ArrayList<>(); + + loader.readHeaders(); + while (loader.readRecord()) { + // String value = loader.getValues()[index]; + data1.add(Integer.valueOf(loader.getValues()[0])); + data2.add(Integer.valueOf(loader.getValues()[1])); + // data.add(Integer.valueOf(value)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + for (int i = 0; i < data2.size(); i++) { + data2_arr[i] = data2.get(i); + } + byte[] encoded_result = new byte[data2_arr.length * 4]; + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = Encoder(data2_arr, dataset_block_size.get(file_i), encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + ratio += ratioTmp; + s = System.nanoTime(); + + int[] data2_arr_decoded = new int[data1.size()]; + + for (int repeat = 0; repeat < repeatTime; repeat++) { + data2_arr_decoded = Decoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + totalEncodeTime += encodeTime; + totalDecodeTime += decodeTime; + totalCompressedSize += compressed_size; + totalPoints += data1.size(); + + for (int i = 0; i < data2_arr_decoded.length; i++) { + assertEquals(data2_arr[i], data2_arr_decoded[i]); + } + + } + + double compressionRatio = totalCompressedSize / (totalPoints * Integer.BYTES); + + String[] record = { + dataset_name.get(file_i), + "Sub-columns", + String.valueOf(totalEncodeTime), + String.valueOf(totalDecodeTime), + String.valueOf(totalPoints), + String.valueOf(totalCompressedSize), + String.valueOf(compressionRatio) + }; + + writer.writeRecord(record); + System.out.println(compressionRatio); + } + writer.close(); + } + +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFBOSBImproveTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFBOSBImproveTest.java deleted file mode 100644 index cecedce..0000000 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFBOSBImproveTest.java +++ /dev/null
@@ -1,2389 +0,0 @@ -package org.apache.iotdb.tsfile.encoding; - -import com.csvreader.CsvReader; -import com.csvreader.CsvWriter; -import org.junit.Test; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; - -import static java.lang.Math.min; -import static java.lang.Math.pow; - -public class TSDIFFBOSBImproveTest { - - public static long combine2Int(int int1, int int2) { - return ((long) int1 << 32) | (int2 & 0xFFFFFFFFL); - } - - public static int getTime(long long1) { - return ((int) (long1 >> 32)); - } - - public static int getValue(long long1) { - return ((int) (long1)); - } - - public static int getCount(long long1, int mask) { - return ((int) (long1 & mask)); - } - public static int getUniqueValue(long long1, int left_shift) { - return ((int) ((long1) >> left_shift)); - } - - public static int getBitWith(int num) { - if (num == 0) return 1; - else return 32 - Integer.numberOfLeadingZeros(num); - } - - public static void int2Bytes(int integer,int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - - public static void intByte2Bytes(int integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer); - } - - private static void long2intBytes(long integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - public static int bytes2Integer(byte[] encoded, int start, int num) { - int value = 0; - if (num > 4) { - System.out.println("bytes2Integer error"); - return 0; - } - for (int i = 0; i < num; i++) { - value <<= 8; - int b = encoded[i + start] & 0xFF; - value |= b; - } - return value; - } - - private static long bytesLong2Integer(byte[] encoded, int decode_pos) { - long value = 0; - for (int i = 0; i < 4; i++) { - value <<= 8; - int b = encoded[i + decode_pos] & 0xFF; - value |= b; - } - return value; - } - - public static void pack8Values(ArrayList<Integer> values, int offset, int width, int encode_pos, byte[] encoded_result) { - int bufIdx = 0; - int valueIdx = offset; - // remaining bits for the current unfinished Integer - int leftBit = 0; - - while (valueIdx < 8 + offset) { - // buffer is used for saving 32 bits as a part of result - int buffer = 0; - // remaining size of bits in the 'buffer' - int leftSize = 32; - - // encode the left bits of current Integer to 'buffer' - if (leftBit > 0) { - buffer |= (values.get(valueIdx) << (32 - leftBit)); - leftSize -= leftBit; - leftBit = 0; - valueIdx++; - } - - while (leftSize >= width && valueIdx < 8 + offset) { - // encode one Integer to the 'buffer' - buffer |= (values.get(valueIdx)<< (leftSize - width)); - leftSize -= width; - valueIdx++; - } - // If the remaining space of the buffer can not save the bits for one Integer, - if (leftSize > 0 && valueIdx < 8 + offset) { - // put the first 'leftSize' bits of the Integer into remaining space of the - // buffer - buffer |= (values.get(valueIdx) >>> (width - leftSize)); - leftBit = width - leftSize; - } - - // put the buffer into the final result - for (int j = 0; j < 4; j++) { - encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); - encode_pos ++; - bufIdx++; - if (bufIdx >= width) { - return ; - } - } - } -// return encode_pos; - } - - public static void unpack8Values(byte[] encoded, int offset,int width, ArrayList<Integer> result_list) { - int byteIdx = offset; - long buffer = 0; - // total bits which have read from 'buf' to 'buffer'. i.e., - // number of available bits to be decoded. - int totalBits = 0; - int valueIdx = 0; - - while (valueIdx < 8) { - // If current available bits are not enough to decode one Integer, - // then add next byte from buf to 'buffer' until totalBits >= width - while (totalBits < width) { - buffer = (buffer << 8) | (encoded[byteIdx] & 0xFF); - byteIdx++; - totalBits += 8; - } - - // If current available bits are enough to decode one Integer, - // then decode one Integer one by one until left bits in 'buffer' is - // not enough to decode one Integer. - while (totalBits >= width && valueIdx < 8) { - result_list.add ((int) (buffer >>> (totalBits - width))); - valueIdx++; - totalBits -= width; - buffer = buffer & ((1L << totalBits) - 1); - } - } - } - - public static int bitPacking(ArrayList<Integer> numbers, int start, int bit_width,int encode_pos, byte[] encoded_result) { - int block_num = (numbers.size()-start) / 8; - for(int i=0;i<block_num;i++){ - pack8Values( numbers, start+i*8, bit_width,encode_pos, encoded_result); - encode_pos +=bit_width; - } - - return encode_pos; - - } - - public static ArrayList<Integer> decodeBitPacking( - byte[] encoded, int decode_pos, int bit_width, int block_size) { - ArrayList<Integer> result_list = new ArrayList<>(); - int block_num = (block_size - 1) / 8; - - for (int i = 0; i < block_num; i++) { // bitpacking - unpack8Values( encoded, decode_pos, bit_width, result_list); - decode_pos += bit_width; - - } - return result_list; - } - - - public static int[] getAbsDeltaTsBlock( - int[] ts_block, - int i, - int block_size, - int remaining, - int[] min_delta) { - int[] ts_block_delta = new int[remaining-1]; - - int value_delta_min = Integer.MAX_VALUE; - int value_delta_max = Integer.MIN_VALUE; - int base = i*block_size+1; - int end = i*block_size+remaining; - - int tmp_j_1 = ts_block[base-1]; - min_delta[0] =tmp_j_1; - int j = base; - int tmp_j; - - while(j<end){ - tmp_j = ts_block[j]; - int epsilon_v = tmp_j - tmp_j_1; - ts_block_delta[j-base] = epsilon_v; - if (epsilon_v < value_delta_min) { - value_delta_min = epsilon_v; - } - if (epsilon_v > value_delta_max) { - value_delta_max = epsilon_v; - } - tmp_j_1 = tmp_j; - j++; - } - j = 0; - end = remaining -1; - while(j<end){ - ts_block_delta[j] = ts_block_delta[j] - value_delta_min; - j++; - } - - min_delta[1] = value_delta_min; - min_delta[2] = (value_delta_max-value_delta_min); - - - return ts_block_delta; - } - - - public static int encodeOutlier2Bytes( - ArrayList<Integer> ts_block_delta, - int bit_width, - int encode_pos, byte[] encoded_result) { - - encode_pos = bitPacking(ts_block_delta, 0, bit_width, encode_pos, encoded_result); - - int n_k = ts_block_delta.size(); - int n_k_b = n_k / 8; - long cur_remaining = 0; // encoded int - int cur_number_bits = 0; // the bit width used of encoded int - for (int i = n_k_b * 8; i < n_k; i++) { - long cur_value = ts_block_delta.get(i); - int cur_bit_width = bit_width; // remaining bit width of current value - - if (cur_number_bits + bit_width >= 32) { - cur_remaining <<= (32 - cur_number_bits); - cur_bit_width = bit_width - 32 + cur_number_bits; - cur_remaining += ((cur_value >> cur_bit_width)); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - cur_remaining = 0; - cur_number_bits = 0; - } - - cur_remaining <<= cur_bit_width; - cur_number_bits += cur_bit_width; - cur_remaining += (((cur_value << (32 - cur_bit_width)) & 0xFFFFFFFFL) >> (32 - cur_bit_width)); // - } - cur_remaining <<= (32 - cur_number_bits); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - return encode_pos; - - } - - - public static ArrayList<Integer> decodeOutlier2Bytes( - byte[] encoded, - int decode_pos, - int bit_width, - int length, - ArrayList<Integer> encoded_pos_result - ) { - - int n_k_b = length / 8; - int remaining = length - n_k_b * 8; - ArrayList<Integer> result_list = new ArrayList<>(decodeBitPacking(encoded, decode_pos, bit_width, n_k_b * 8 + 1)); - decode_pos += n_k_b * bit_width; - - ArrayList<Long> int_remaining = new ArrayList<>(); - int int_remaining_size = remaining * bit_width / 32 + 1; - for (int j = 0; j < int_remaining_size; j++) { - int_remaining.add(bytesLong2Integer(encoded, decode_pos)); - decode_pos += 4; - } - - int cur_remaining_bits = 32; // remaining bit width of current value - long cur_number = int_remaining.get(0); - int cur_number_i = 1; - for (int i = n_k_b * 8; i < length; i++) { - if (bit_width < cur_remaining_bits) { - int tmp = (int) (cur_number >> (32 - bit_width)); - result_list.add(tmp); - cur_number <<= bit_width; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits -= bit_width; - } else { - int tmp = (int) (cur_number >> (32 - cur_remaining_bits)); - int remain_bits = bit_width - cur_remaining_bits; - tmp <<= remain_bits; - - cur_number = int_remaining.get(cur_number_i); - cur_number_i++; - tmp += (int) (cur_number >> (32 - remain_bits)); - result_list.add(tmp); - cur_number <<= remain_bits; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits = 32 - remain_bits; - } - } - encoded_pos_result.add(decode_pos); - return result_list; - } - - private static int BOSEncodeBits(int[] ts_block_delta, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal = new ArrayList<>(); - int k1 = 0; - int k2 = 0; - - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { - final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - k1++; - - - } else if (cur_value >= final_k_end_value) { - final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { - final_normal.add(cur_value - final_x_l_plus); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(min_delta[1],encode_pos,cur_byte); - encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - - if(k1==0 && k2==0){ - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// return encode_pos; - } - else{ - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - - bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } - } - - -// if(k1+k2!=block_size) - encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - if (k1 != 0) - encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); - if (k2 != 0) - encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); - return encode_pos; - - } - - - private static int BOSBlockEncoder(int[] ts_block, int block_i, int block_size, int remaining ,int encode_pos , byte[] cur_byte) { - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); - - - block_size = remaining-1; - int max_delta_value = min_delta[2]; - int[] value_list = new int[block_size]; - int unique_value_count = 0; - int[] value_count_list = new int[max_delta_value+1]; - for(int value:ts_block_delta){ - if(value_count_list[value]==0){ - value_count_list[value] = 1; - value_list[unique_value_count] = value; - unique_value_count ++; - }else{ - value_count_list[value] ++; - } - } - - int left_shift = getBitWith(block_size); - int mask = (1 << left_shift) - 1; - long[] sorted_value_list = new long[unique_value_count]; - int count = 0; - - for(int i=0;i<unique_value_count;i++){ - int value = value_list[i]; - sorted_value_list[i] = (((long) value) << left_shift) + value_count_list[value]; - } - Arrays.sort(sorted_value_list); - - for(int i=0;i<unique_value_count;i++){ - count += getCount(sorted_value_list[i], mask); - sorted_value_list[i] = (((long)getUniqueValue(sorted_value_list[i], left_shift) ) << left_shift) + count;//new_value_list[i] - } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2 ) * (block_size)); - - int cur_k1 = 0; - - int x_l_plus_value = 0; // x_l_plus - int x_u_minus_value = max_delta_value; // x_u_plus - - for (int end_value_i = 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); // cur_k1 = 0 - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_x_u_minus = x_u_minus_value; - final_k_end_value = x_u_plus_value; - } - } - - int k_start_value = -1; // x_l_minus -// int beta_max_all = getBitWith(max_delta_value)+1; -// int[][] hash_table_count = new int[unique_value_count][beta_max_all]; -// int[][] hash_table_value = new int[unique_value_count][beta_max_all]; -// int cur_value = getUniqueValue(sorted_value_list[0], left_shift) ; -// int next_value = getUniqueValue(sorted_value_list[1], left_shift) ; -// -// for (int value_i = 0; value_i < unique_value_count; value_i++) { -// -// -// next_value = getUniqueValue(sorted_value_list[value_i + 1], left_shift) ; -// long k_start_valueL = sorted_value_list[value_i]; -// hash_table_count[value_i][0] = getCount(k_start_valueL,mask); -// -// int beta_max = getBitWith(max_delta_value - cur_value); -// for(int beta = 1; beta <= beta_max; beta++){ -// -// } -// cur_value = next_value ; -// -// } - - - int gamma_max = getBitWith(max_delta_value); - int[] gamma_count_list = new int[gamma_max+1]; - int[] x_u_minus_value_list = new int[gamma_max+1]; - int[] x_u_plus_value_list = new int[gamma_max+1]; - int end_i = unique_value_count - 1; - for(int gamma = 0; gamma <= gamma_max; gamma++) { - int x_u_plus_pow_beta = max_delta_value - (1<<gamma) + 1; -// int x_u_plus_pow_beta = (int) (max_delta_value - pow(2, gamma) + 1); -// - for (; end_i > 0; end_i--) { - x_u_minus_value = getUniqueValue(sorted_value_list[end_i - 1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_i], left_shift); - if (x_u_minus_value < x_u_plus_pow_beta && x_u_plus_value >= x_u_plus_pow_beta){ - gamma_count_list[gamma] = getCount(sorted_value_list[end_i-1],mask); - x_u_minus_value_list[gamma] = x_u_minus_value; - x_u_plus_value_list[gamma] = x_u_plus_value; - } else if (x_u_minus_value < x_u_plus_pow_beta) { - break; - } - } - } - for(int gamma = 1; gamma < gamma_max; gamma++) { - if(gamma_count_list[gamma]==0){ - gamma_count_list[gamma] = gamma_count_list[gamma-1]; - x_u_minus_value_list[gamma] = x_u_minus_value_list[gamma-1]; - x_u_plus_value_list[gamma] = x_u_plus_value_list[gamma-1]; - } - } - - for (int start_value_i = 0; start_value_i < unique_value_count-1; start_value_i++) { - long k_start_valueL = sorted_value_list[start_value_i]; - k_start_value = getUniqueValue(k_start_valueL, left_shift) ; - x_l_plus_value = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - - - cur_k1 = getCount(k_start_valueL,mask); - - int k_end_value; - int cur_bits; - int cur_k2; - k_end_value = max_delta_value + 1; - - cur_bits = 0; - cur_k2 = 0; - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1) * getBitWith(k_end_value- x_l_plus_value); //cur_k2 =0 - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = max_delta_value; - } - - int beta_max = getBitWith(max_delta_value - x_l_plus_value); - - int lower_outlier_cost = cur_k1 * getBitWith(k_start_value); - - - - for(int gamma = 0; gamma < beta_max; gamma++){ -// int x_u_plus_pow_beta = (int) (max_delta_value - pow(2,gamma)+1); - x_u_minus_value = x_u_minus_value_list[gamma]; - k_end_value = x_u_plus_value_list[gamma]; - cur_bits = 0; - cur_k2 = block_size - gamma_count_list[gamma]; - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += lower_outlier_cost; - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = x_u_minus_value; - } - - } -// end_value_i = unique_value_count - 1; -// for(int gamma = 0; gamma <= beta_max; gamma++){ -// for (; end_value_i > start_value_i; end_value_i--) { -// int x_u_plus_pow_beta = (int) (max_delta_value - pow(2,gamma)+1); -// x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); -// k_end_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); -// if(x_u_minus_value < x_u_plus_pow_beta && k_end_value >= x_u_plus_pow_beta){ -// cur_bits = 0; -// cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); -// -// cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); -// cur_bits += cur_k1 * getBitWith(k_start_value); -// if (cur_k1 + cur_k2 != block_size) -// cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); -// if (cur_k2 != 0) -// cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); -// -// -// if (cur_bits < min_bits) { -// min_bits = cur_bits; -// final_k_start_value = k_start_value; -// final_x_l_plus = x_l_plus_value; -// final_k_end_value = k_end_value; -// final_x_u_minus = x_u_minus_value; -// } -// } else if (x_u_minus_value < x_u_plus_pow_beta && k_end_value < x_u_plus_pow_beta) { -// break; -// } -// } -// } -// - - } - - for(int beta = 0; beta < gamma_max; beta++){ - - int pow_beta = 1<<beta; - int start_value_i = 0; - int end_value_i = start_value_i+1; - - for (; start_value_i < unique_value_count-1; start_value_i++) { - long x_l_minusL = sorted_value_list[start_value_i]; - int x_l_minus = getUniqueValue(x_l_minusL, left_shift) ; - int x_l_plus = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - int x_u_plus_pow_beta = pow_beta+x_l_plus; - if(x_u_plus_pow_beta > max_delta_value) break; - - - - cur_k1 = getCount(x_l_minusL,mask); - int lower_outlier_cost = cur_k1 * getBitWith(x_l_minus); - - while ( end_value_i < unique_value_count) { -// if(beta==3 && end_value_i==22) -// { -// System.out.println(x_l_minus); -// System.out.println(x_l_plus); -// } - - int x_u_minus = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus = getUniqueValue(sorted_value_list[end_value_i], left_shift); - if(x_u_minus < x_u_plus_pow_beta && x_u_plus >= x_u_plus_pow_beta){ - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += lower_outlier_cost; - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus - x_l_plus); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = x_l_minus; - final_x_l_plus = x_l_plus; - final_k_end_value = x_u_plus; - final_x_u_minus = x_u_minus; - } - break; - } -// else if (x_u_minus >= x_u_plus_pow_beta && x_u_plus >= x_u_plus_pow_beta) { -// break; -// } - - end_value_i++; - } - } - - } - - encode_pos = BOSEncodeBitsImprove(ts_block_delta, final_k_start_value, final_x_l_plus, final_k_end_value, final_x_u_minus, - max_delta_value, min_delta, encode_pos , cur_byte); - -// System.out.println(encode_pos); - - return encode_pos; - } - - - public static int BOSEncoder( - int[] data, int block_size, byte[] encoded_result) { - block_size++; - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { - encode_pos = BOSBlockEncoder(data, i, block_size, block_size,encode_pos,encoded_result); - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } - else { - - int start = block_num * block_size; - int remaining = length_all-start; - encode_pos = BOSBlockEncoder(data, block_num, block_size,remaining, encode_pos,encoded_result); - - } - - - return encode_pos; - } - public static int BOSBlockDecoder(byte[] encoded, int decode_pos, int[] value_list, int block_size, int[] value_pos_arr) { - - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - - int value0 = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] =value0; - value_pos_arr[0] ++; - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal= new ArrayList<>();; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int final_k_start_value = 0; - int final_k_end_value = 0; - int bit_width_final = 0; - int left_bit_width = 0; - int right_bit_width = 0; - - if(k1!=0 || k2 != 0){ - final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (block_size + k1 + k2) / (double) 8); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < block_size ) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - }else { - bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - } - - - - - ArrayList<Integer> decode_pos_normal = new ArrayList<>(); - final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size - k1 - k2, decode_pos_normal); - - decode_pos = decode_pos_normal.get(0); - if (k1 != 0) { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); - decode_pos = decode_pos_result_left.get(0); - } - if (k2 != 0) { - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); - decode_pos = decode_pos_result_right.get(0); - } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; - int pre_v = value0; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - - - for (int i = 0; i < block_size; i++) { - int current_delta; - if (left_outlier_i >= k1) { - if (right_outlier_i >= k2) { - current_delta = min_delta + final_normal.get(normal_i) + final_k_start_value; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = min_delta + final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = min_delta + final_normal.get(normal_i) + final_k_start_value; - normal_i++; - } - } else if (i == final_left_outlier_index.get(left_outlier_i)) { - current_delta = min_delta + final_left_outlier.get(left_outlier_i); - left_outlier_i++; - } else { - - if (right_outlier_i >= k2) { - current_delta = min_delta + final_normal.get(normal_i) + final_k_start_value; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = min_delta + final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = min_delta + final_normal.get(normal_i) + final_k_start_value; - normal_i++; - } - } - - pre_v = current_delta + pre_v; - value_list[value_pos_arr[0]] = pre_v; - value_pos_arr[0]++; - } - return decode_pos; - } - - public static void BOSDecoder(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - - int[] value_list = new int[length_all+block_size]; - block_size--; - - int[] value_pos_arr = new int[1]; - for (int k = 0; k < block_num; k++) { - - - decode_pos = BOSBlockDecoder(encoded, decode_pos, value_list, block_size,value_pos_arr); - - } - - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - remain_length --; - BOSBlockDecoder(encoded, decode_pos, value_list, remain_length, value_pos_arr); - } - } - - public static int EncodeBits(int num, - int bit_width, - int encode_pos, - byte[] cur_byte, - int[] bit_index_list){ - // 找到要插入的位的索引 - int bit_index = bit_index_list[0] ;//cur_byte[encode_pos + 1]; - - // 计算数值的起始位位置 - int remaining_bits = bit_width; - - while (remaining_bits > 0) { - // 计算在当前字节中可以使用的位数 - int available_bits = bit_index; - int bits_to_write = Math.min(available_bits, remaining_bits); - - // 更新 bit_index - bit_index = available_bits - bits_to_write; - - // 计算要写入的位的掩码和数值 - int mask = (1 << bits_to_write) - 1; - int bits = (num >> (remaining_bits - bits_to_write)) & mask; - - // 写入到当前位置 - cur_byte[encode_pos] &= (byte) ~(mask << bit_index); // 清除对应位置的位 - cur_byte[encode_pos] |= (byte) (bits << bit_index); - - // 更新位宽和数值 - remaining_bits -= bits_to_write; - if (bit_index == 0) { - bit_index = 8; - encode_pos++; - } - } - bit_index_list[0] = bit_index; -// cur_byte[encode_pos + 1] = (byte) bit_index; - return encode_pos; - } - private static int BOSEncodeBitsImprove(int[] ts_block_delta, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal = new ArrayList<>(); - int k1 = 0; - int k2 = 0; - - - - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { -// encode_pos = EncodeBits(cur_value,left_bit_width,encode_pos,cur_byte); -// final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - k1++; - - - } else if (cur_value >= final_k_end_value) { -// encode_pos = EncodeBits(cur_value- final_k_end_value,right_bit_width,encode_pos,cur_byte); -// final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { -// final_normal.add(cur_value - final_x_l_plus); -// encode_pos = EncodeBits(cur_value- final_x_l_plus,right_bit_width,encode_pos,cur_byte); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(min_delta[1],encode_pos,cur_byte); - encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - int[] bit_index_list = new int[1]; - bit_index_list[0] = 8; - - if(final_k_start_value<0 && final_k_end_value > max_delta_value){ -// int bit_width_final= getBitWith(final_x_u_minus - final_x_l_plus); -// cur_byte[encode_pos+1] = 8; - for (int cur_value : ts_block_delta) { - encode_pos = EncodeBits(cur_value, bit_width_final, encode_pos, cur_byte, bit_index_list); -// final_normal.add(cur_value); - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } -// cur_byte[encode_pos+1] = 0; -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - return encode_pos; - } - - - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } -// cur_byte[encode_pos+1] = 8; -// bit_index_list[0] = 8; - for (int cur_value : ts_block_delta) { - if (cur_value <= final_k_start_value) { - encode_pos = EncodeBits(cur_value, left_bit_width, encode_pos, cur_byte,bit_index_list); - } else if (cur_value >= final_k_end_value) { - encode_pos = EncodeBits(cur_value - final_k_end_value, right_bit_width, encode_pos, cur_byte,bit_index_list); - } else { - encode_pos = EncodeBits(cur_value - final_x_l_plus, bit_width_final, encode_pos, cur_byte,bit_index_list); - } - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } - -// cur_byte[encode_pos+1] = 0; - -// if(k1==0 && k2==0){ -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// -// -// } -// else{ -// int2Bytes(final_x_l_plus,encode_pos,cur_byte); -// encode_pos += 4; -// int2Bytes(final_k_end_value,encode_pos,cur_byte); -// encode_pos += 4; -// -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(left_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(right_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// if (final_alpha == 0) { // 0 -// -// for (int i : bitmap_outlier) { -// -// intByte2Bytes(i,encode_pos,cur_byte); -// encode_pos += 1; -// } -// } else { -// encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// } -// } - - -// if(k1+k2!=block_size) -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// if (k1 != 0) -// encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); -// if (k2 != 0) -// encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); - return encode_pos; - - } - - private static int BOSBlockEncoderImprove(int[] ts_block, int block_i, int block_size, int remaining ,int encode_pos , byte[] cur_byte) { - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); - - - block_size = remaining-1; - int max_delta_value = min_delta[2]; - int[] value_list = new int[block_size]; - int unique_value_count = 0; - int[] value_count_list = new int[max_delta_value+1]; - for(int value:ts_block_delta){ - if(value_count_list[value]==0){ - value_count_list[value] = 1; - value_list[unique_value_count] = value; - unique_value_count ++; - }else{ - value_count_list[value] ++; - } - } - - int left_shift = getBitWith(block_size); - int mask = (1 << left_shift) - 1; - long[] sorted_value_list = new long[unique_value_count]; - int count = 0; - - for(int i=0;i<unique_value_count;i++){ - int value = value_list[i]; - sorted_value_list[i] = (((long) value) << left_shift) + value_count_list[value]; - } - Arrays.sort(sorted_value_list); - - for(int i=0;i<unique_value_count;i++){ - count += getCount(sorted_value_list[i], mask); - sorted_value_list[i] = (((long)getUniqueValue(sorted_value_list[i], left_shift) ) << left_shift) + count;//new_value_list[i] - } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2 ) * (block_size)); - - int cur_k1 = 0; - - int x_l_plus_value = 0; // x_l_plus - int x_u_minus_value = max_delta_value; // x_u_plus - - for (int end_value_i = 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); // cur_k1 = 0 - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_x_u_minus = x_u_minus_value; - final_k_end_value = x_u_plus_value; - } - } - - int k_start_value = -1; // x_l_minus -// int beta_max_all = getBitWith(max_delta_value)+1; -// int[][] hash_table_count = new int[unique_value_count][beta_max_all]; -// int[][] hash_table_value = new int[unique_value_count][beta_max_all]; -// int cur_value = getUniqueValue(sorted_value_list[0], left_shift) ; -// int next_value = getUniqueValue(sorted_value_list[1], left_shift) ; -// -// for (int value_i = 0; value_i < unique_value_count; value_i++) { -// -// -// next_value = getUniqueValue(sorted_value_list[value_i + 1], left_shift) ; -// long k_start_valueL = sorted_value_list[value_i]; -// hash_table_count[value_i][0] = getCount(k_start_valueL,mask); -// -// int beta_max = getBitWith(max_delta_value - cur_value); -// for(int beta = 1; beta <= beta_max; beta++){ -// -// } -// cur_value = next_value ; -// -// } - - - int gamma_max = getBitWith(max_delta_value); - int[] gamma_count_list = new int[gamma_max+1]; - int[] x_u_minus_value_list = new int[gamma_max+1]; - int[] x_u_plus_value_list = new int[gamma_max+1]; - int end_i = unique_value_count - 1; - for(int gamma = 0; gamma <= gamma_max; gamma++) { - int x_u_plus_pow_beta = max_delta_value - (1<<gamma) + 1; -// int x_u_plus_pow_beta = (int) (max_delta_value - pow(2, gamma) + 1); -// - for (; end_i > 0; end_i--) { - x_u_minus_value = getUniqueValue(sorted_value_list[end_i - 1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_i], left_shift); - if (x_u_minus_value < x_u_plus_pow_beta && x_u_plus_value >= x_u_plus_pow_beta){ - gamma_count_list[gamma] = getCount(sorted_value_list[end_i-1],mask); - x_u_minus_value_list[gamma] = x_u_minus_value; - x_u_plus_value_list[gamma] = x_u_plus_value; - } else if (x_u_minus_value < x_u_plus_pow_beta) { - break; - } - } - } - for(int gamma = 1; gamma < gamma_max; gamma++) { - if(gamma_count_list[gamma]==0){ - gamma_count_list[gamma] = gamma_count_list[gamma-1]; - x_u_minus_value_list[gamma] = x_u_minus_value_list[gamma-1]; - x_u_plus_value_list[gamma] = x_u_plus_value_list[gamma-1]; - } - } - - for (int start_value_i = 0; start_value_i < unique_value_count-1; start_value_i++) { - long k_start_valueL = sorted_value_list[start_value_i]; - k_start_value = getUniqueValue(k_start_valueL, left_shift) ; - x_l_plus_value = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - - - cur_k1 = getCount(k_start_valueL,mask); - - int k_end_value; - int cur_bits; - int cur_k2; - k_end_value = max_delta_value + 1; - - cur_bits = 0; - cur_k2 = 0; - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1) * getBitWith(k_end_value- x_l_plus_value); //cur_k2 =0 - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = max_delta_value; - } - - int beta_max = getBitWith(max_delta_value - x_l_plus_value); - - int lower_outlier_cost = cur_k1 * getBitWith(k_start_value); - - - - for(int gamma = 0; gamma < beta_max; gamma++){ -// int x_u_plus_pow_beta = (int) (max_delta_value - pow(2,gamma)+1); - x_u_minus_value = x_u_minus_value_list[gamma]; - k_end_value = x_u_plus_value_list[gamma]; - cur_bits = 0; - cur_k2 = block_size - gamma_count_list[gamma]; - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += lower_outlier_cost; - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = x_u_minus_value; - } - - } -// end_value_i = unique_value_count - 1; -// for(int gamma = 0; gamma <= beta_max; gamma++){ -// for (; end_value_i > start_value_i; end_value_i--) { -// int x_u_plus_pow_beta = (int) (max_delta_value - pow(2,gamma)+1); -// x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); -// k_end_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); -// if(x_u_minus_value < x_u_plus_pow_beta && k_end_value >= x_u_plus_pow_beta){ -// cur_bits = 0; -// cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); -// -// cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); -// cur_bits += cur_k1 * getBitWith(k_start_value); -// if (cur_k1 + cur_k2 != block_size) -// cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); -// if (cur_k2 != 0) -// cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); -// -// -// if (cur_bits < min_bits) { -// min_bits = cur_bits; -// final_k_start_value = k_start_value; -// final_x_l_plus = x_l_plus_value; -// final_k_end_value = k_end_value; -// final_x_u_minus = x_u_minus_value; -// } -// } else if (x_u_minus_value < x_u_plus_pow_beta && k_end_value < x_u_plus_pow_beta) { -// break; -// } -// } -// } -// - - } - - for(int beta = 0; beta < gamma_max; beta++){ - - int pow_beta = 1<<beta; - int start_value_i = 0; - int end_value_i = start_value_i+1; - - for (; start_value_i < unique_value_count-1; start_value_i++) { - long x_l_minusL = sorted_value_list[start_value_i]; - int x_l_minus = getUniqueValue(x_l_minusL, left_shift) ; - int x_l_plus = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - int x_u_plus_pow_beta = pow_beta+x_l_plus; - if(x_u_plus_pow_beta > max_delta_value) break; - - - - cur_k1 = getCount(x_l_minusL,mask); - int lower_outlier_cost = cur_k1 * getBitWith(x_l_minus); - - while ( end_value_i < unique_value_count) { -// if(beta==3 && end_value_i==22) -// { -// System.out.println(x_l_minus); -// System.out.println(x_l_plus); -// } - - int x_u_minus = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus = getUniqueValue(sorted_value_list[end_value_i], left_shift); - if(x_u_minus < x_u_plus_pow_beta && x_u_plus >= x_u_plus_pow_beta){ - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += lower_outlier_cost; - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus - x_l_plus); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = x_l_minus; - final_x_l_plus = x_l_plus; - final_k_end_value = x_u_plus; - final_x_u_minus = x_u_minus; - } - break; - } -// else if (x_u_minus >= x_u_plus_pow_beta && x_u_plus >= x_u_plus_pow_beta) { -// break; -// } - - end_value_i++; - } - } - - } - - encode_pos = BOSEncodeBitsImprove(ts_block_delta, final_k_start_value, final_x_l_plus, final_k_end_value, final_x_u_minus, - max_delta_value, min_delta, encode_pos , cur_byte); - -// System.out.println(encode_pos); - - return encode_pos; - } - - - public static int BOSEncoderImprove( - int[] data, int block_size, byte[] encoded_result) { - block_size++; - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { - encode_pos = BOSBlockEncoderImprove(data, i, block_size, block_size,encode_pos,encoded_result); - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - } - else { - int start = block_num * block_size; - int remaining = length_all-start; - encode_pos = BOSBlockEncoderImprove(data, block_num, block_size,remaining, encode_pos,encoded_result); - - } - return encode_pos; - } - - - public static int DecodeBits(byte[] cur_byte, int bit_width, int[] decode_pos_list) { - int decode_pos = decode_pos_list[0]; - int bit_index = decode_pos_list[1]; //cur_byte[decode_pos + 1]; - int remaining_bits = bit_width; - int num = 0; - - while (remaining_bits > 0) { - int available_bits = bit_index; - int bits_to_read = Math.min(available_bits, remaining_bits); - - // 计算要读取的位的掩码 - int mask = (1 << bits_to_read) - 1; - int bits = (cur_byte[decode_pos] >> (available_bits - bits_to_read)) & mask; - - // 将读取的位合并到结果中 - num = (num << bits_to_read) | bits; - - // 更新位宽和 bit_index - remaining_bits -= bits_to_read; - bit_index = available_bits - bits_to_read; - - if (bit_index == 0) { - bit_index = 8; - decode_pos++; - } - } - decode_pos_list[0] = decode_pos; - decode_pos_list[1] = bit_index; - - return num; - } - public static int BOSBlockDecoderImprove(byte[] encoded, int decode_pos, int[] value_list, int block_size, int[] value_pos_arr) { - - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - - int value0 = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] =value0; - value_pos_arr[0] ++; - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - int[] decode_list = new int[2]; - decode_list[0]= decode_pos; - decode_list[1]= 8; - - int valuePos = value_pos_arr[0]; - - if(k1==0 && k2==0){ - int pre_v = value0; - for (int i = 0; i < block_size; i++) { - int cur_delta = min_delta + DecodeBits(encoded, bit_width_final, decode_list); - pre_v += cur_delta; - value_list[valuePos++] = pre_v; - } - value_pos_arr[0] = valuePos; - return decode_list[0]; - } - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); -// ArrayList<Integer> final_left_outlier = new ArrayList<>(); -// ArrayList<Integer> final_right_outlier = new ArrayList<>(); -// ArrayList<Integer> final_normal= new ArrayList<>();; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int final_k_start_value = 0; - int final_k_end_value = 0; -// int bit_width_final = 0; - int left_bit_width = 0; - int right_bit_width = 0; - - final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - -// bit_width_final = bytes2Integer(encoded, decode_pos, 1); -// decode_pos += 1; - - left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (block_size + k1 + k2) / (double) 8); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < block_size ) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - - - - - -// ArrayList<Integer> decode_pos_normal = new ArrayList<>(); -// final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size - k1 - k2, decode_pos_normal); -// -// decode_pos = decode_pos_normal.get(0); -// if (k1 != 0) { -// ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); -// final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); -// decode_pos = decode_pos_result_left.get(0); -// } -// if (k2 != 0) { -// ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); -// final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); -// decode_pos = decode_pos_result_right.get(0); -// } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; - int pre_v = value0; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - -// Precompute constants - int normalOffset = min_delta + final_k_start_value; - int rightOutlierOffset = min_delta + final_k_end_value; - -// Initialize indices and pre-fetch next outlier positions - int leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - int rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - decode_list[0]= decode_pos; -// decode_list[1]= 8; - // Use a local variable for the position - for (int i = 0; i < block_size; i++) { - int currentDelta; - if (i == leftOutlierNextIndex) { - // Process left outlier - currentDelta = min_delta + DecodeBits(encoded, left_bit_width, decode_list); // final_left_outlier.get(left_outlier_i); - left_outlier_i++; - leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - } else if (i == rightOutlierNextIndex) { - // Process right outlier - currentDelta = rightOutlierOffset + DecodeBits(encoded, right_bit_width, decode_list);// final_right_outlier.get(right_outlier_i); - right_outlier_i++; - rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - } else { - // Process normal value - currentDelta = normalOffset + DecodeBits(encoded, bit_width_final, decode_list); - normal_i++; - } - - // Update the cumulative value and store it - pre_v += currentDelta; - value_list[valuePos++] = pre_v; - } - value_pos_arr[0] = valuePos; - if(decode_list[1]!=8){ - return decode_list[0]+1; - }else { - return decode_list[0]; - } -// decode_pos = decode_list[0]; -// Update the position in the array - - -// return decode_pos; - } - - public static void BOSDecoderImprove(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - - int[] value_list = new int[length_all+block_size]; - block_size--; - - int[] value_pos_arr = new int[1]; - for (int k = 0; k < block_num; k++) { - - - decode_pos = BOSBlockDecoderImprove(encoded, decode_pos, value_list, block_size,value_pos_arr); - - } - - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - remain_length --; - BOSBlockDecoderImprove(encoded, decode_pos, value_list, remain_length, value_pos_arr); - } - } - - @Test - public void BOSDecodeImproveTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R1O4_decode_time/compression_ratio/bos_b"; -// String output_parent_dir = parent_dir + "icde0802/compression_ratio/test"; - - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 9; file_i < 10; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[2]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "TS_2DIFF+BOS-B", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); -// break; - } - writer.close(); - } - } - - @Test - public void BOSEncodeImproveTest() throws IOException { -// String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path - String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/bos_b_improve"; -// String output_parent_dir = parent_dir + "icde0802/compression_ratio/test"; - - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 200; -// for (int file_i = 9; file_i < 10; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[2]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "TS_2DIFF+BOS-B", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); -// break; - } - writer.close(); - } - } - - @Test - public void BOSVaryBlockSize() throws IOException, InterruptedException { -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path - String output_parent_dir = parent_dir + "icde0802/compression_ratio/block_size_bos_improve"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 - - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 - - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 - - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 - - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 - - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 - - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 - - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 - - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 - - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 - - int repeatTime2 = 100; -// int[] file_i_list = {0,9,10}; -// for (int file_i = 9; file_i < 10; file_i++) { -// for(int file_i :file_i_list){ - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Block Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - -// for (int block_size_i = 6; block_size_i > 5; block_size_i--) { - for (int block_size_i = 13; block_size_i > 5; block_size_i--) { - int block_size = (int) Math.pow(2, block_size_i); - System.out.println(block_size); - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - compressed_size = BOSEncoderImprove(data2_arr, block_size, encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - String[] record = { - f.toString(), - "TS_2DIFF+BOS-B-Improve", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(block_size_i), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - - } -// Thread.sleep(10000); - } - writer.close(); - - } - } - - @Test - public void BOSOUOTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R2O3_lower_outlier_compare/compression_ratio/bos"; -// String output_parent_dir = parent_dir + "icde0802/compression_ratio/test"; - - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 200; -// for (int file_i = 9; file_i < 10; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[2]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "TS_2DIFF+BOS-B", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); -// break; - } - writer.close(); - } - } - - -}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFBOSMImproveTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFBOSMImproveTest.java deleted file mode 100644 index e218148..0000000 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFBOSMImproveTest.java +++ /dev/null
@@ -1,3016 +0,0 @@ -package org.apache.iotdb.tsfile.encoding; - -import com.csvreader.CsvReader; -import com.csvreader.CsvWriter; -import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry; -import org.apache.commons.compress.archivers.sevenz.SevenZOutputFile; -import java.nio.channels.Channels; -import java.nio.channels.SeekableByteChannel; -import java.nio.channels.WritableByteChannel; -import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; -import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; -import org.apache.iotdb.tsfile.compress.ICompressor; -import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType; -import org.junit.Test; - -import java.io.*; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.StandardOpenOption; -import java.util.ArrayList; -import java.util.Arrays; - -import static java.lang.Math.*; -import static org.apache.iotdb.tsfile.constant.TestConstant.random; - -public class TSDIFFBOSMImproveTest { - - public static long combine2Int(int int1, int int2) { - return ((long) int1 << 32) | (int2 & 0xFFFFFFFFL); - } - - public static int getTime(long long1) { - return ((int) (long1 >> 32)); - } - - public static int getValue(long long1) { - return ((int) (long1)); - } - - public static int getCount(long long1, int mask) { - return ((int) (long1 & mask)); - } - public static int getUniqueValue(long long1, int left_shift) { - return ((int) ((long1) >> left_shift)); - } - - public static int getBitWith(int num) { - if (num == 0) return 1; - else return 32 - Integer.numberOfLeadingZeros(num); - } - - public static void int2Bytes(int integer,int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - - public static void intByte2Bytes(int integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer); - } - - private static void long2intBytes(long integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - public static int bytes2Integer(byte[] encoded, int start, int num) { - int value = 0; - if (num > 4) { - System.out.println("bytes2Integer error"); - return 0; - } - for (int i = 0; i < num; i++) { - value <<= 8; - int b = encoded[i + start] & 0xFF; - value |= b; - } - return value; - } - - private static long bytesLong2Integer(byte[] encoded, int decode_pos) { - long value = 0; - for (int i = 0; i < 4; i++) { - value <<= 8; - int b = encoded[i + decode_pos] & 0xFF; - value |= b; - } - return value; - } - - public static void pack8Values(ArrayList<Integer> values, int offset, int width, int encode_pos, byte[] encoded_result) { - int bufIdx = 0; - int valueIdx = offset; - // remaining bits for the current unfinished Integer - int leftBit = 0; - - while (valueIdx < 8 + offset) { - // buffer is used for saving 32 bits as a part of result - int buffer = 0; - // remaining size of bits in the 'buffer' - int leftSize = 32; - - // encode the left bits of current Integer to 'buffer' - if (leftBit > 0) { - buffer |= (values.get(valueIdx) << (32 - leftBit)); - leftSize -= leftBit; - leftBit = 0; - valueIdx++; - } - - while (leftSize >= width && valueIdx < 8 + offset) { - // encode one Integer to the 'buffer' - buffer |= (values.get(valueIdx)<< (leftSize - width)); - leftSize -= width; - valueIdx++; - } - // If the remaining space of the buffer can not save the bits for one Integer, - if (leftSize > 0 && valueIdx < 8 + offset) { - // put the first 'leftSize' bits of the Integer into remaining space of the - // buffer - buffer |= (values.get(valueIdx) >>> (width - leftSize)); - leftBit = width - leftSize; - } - - // put the buffer into the final result - for (int j = 0; j < 4; j++) { - encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); - encode_pos ++; - bufIdx++; - if (bufIdx >= width) { - return ; - } - } - } -// return encode_pos; - } - - public static void unpack8Values(byte[] encoded, int offset,int width, ArrayList<Integer> result_list) { - int byteIdx = offset; - long buffer = 0; - // total bits which have read from 'buf' to 'buffer'. i.e., - // number of available bits to be decoded. - int totalBits = 0; - int valueIdx = 0; - - while (valueIdx < 8) { - // If current available bits are not enough to decode one Integer, - // then add next byte from buf to 'buffer' until totalBits >= width - while (totalBits < width) { - buffer = (buffer << 8) | (encoded[byteIdx] & 0xFF); - byteIdx++; - totalBits += 8; - } - - // If current available bits are enough to decode one Integer, - // then decode one Integer one by one until left bits in 'buffer' is - // not enough to decode one Integer. - while (totalBits >= width && valueIdx < 8) { - result_list.add ((int) (buffer >>> (totalBits - width))); - valueIdx++; - totalBits -= width; - buffer = buffer & ((1L << totalBits) - 1); - } - } - } - - public static int bitPacking(ArrayList<Integer> numbers, int start, int bit_width,int encode_pos, byte[] encoded_result) { - int block_num = (numbers.size()-start) / 8; - for(int i=0;i<block_num;i++){ - pack8Values( numbers, start+i*8, bit_width,encode_pos, encoded_result); - encode_pos +=bit_width; - } - - return encode_pos; - - } - - public static ArrayList<Integer> decodeBitPacking( - byte[] encoded, int decode_pos, int bit_width, int block_size) { - ArrayList<Integer> result_list = new ArrayList<>(); - int block_num = (block_size - 1) / 8; - - for (int i = 0; i < block_num; i++) { // bitpacking - unpack8Values( encoded, decode_pos, bit_width, result_list); - decode_pos += bit_width; - - } - return result_list; - } - - - public static int[] getAbsDeltaTsBlock( - int[] ts_block, - int[] min_delta, - int supple_length) { - int block_size = ts_block.length-1; - int[] ts_block_delta = new int[ts_block.length+supple_length-1]; - - - int value_delta_min = Integer.MAX_VALUE; - int value_delta_max = Integer.MIN_VALUE; - for (int i = 0; i < block_size; i++) { - - int epsilon_v = ts_block[i+1] - ts_block[i]; - - if (epsilon_v < value_delta_min) { - value_delta_min = epsilon_v; - } - if (epsilon_v > value_delta_max) { - value_delta_max = epsilon_v; - } - - } - min_delta[0] = (ts_block[0]); - min_delta[1] = (value_delta_min); - min_delta[2] = (value_delta_max-value_delta_min); - - for (int i = 0; i < block_size; i++) { - int epsilon_v = ts_block[i+1] - value_delta_min - ts_block[i]; - ts_block_delta[i] = epsilon_v; - } - for(int i = block_size;i<block_size+supple_length;i++){ - ts_block_delta[i] = 0; - } - return ts_block_delta; - } - - public static int[] getAbsDeltaTsBlock( - int[] ts_block, - int i, - int block_size, - int remaining, - int[] min_delta) { - int[] ts_block_delta = new int[remaining-1]; - - int value_delta_min = Integer.MAX_VALUE; - int value_delta_max = Integer.MIN_VALUE; - int base = i*block_size+1; - int end = i*block_size+remaining; - - int tmp_j_1 = ts_block[base-1]; - min_delta[0] =tmp_j_1; - int j = base; - int tmp_j; - - while(j<end){ - tmp_j = ts_block[j]; - int epsilon_v = tmp_j - tmp_j_1; - ts_block_delta[j-base] = epsilon_v; - if (epsilon_v < value_delta_min) { - value_delta_min = epsilon_v; - } - if (epsilon_v > value_delta_max) { - value_delta_max = epsilon_v; - } - tmp_j_1 = tmp_j; - j++; - } - j = 0; - end = remaining -1; - while(j<end){ - ts_block_delta[j] = ts_block_delta[j] - value_delta_min; - j++; - } - - min_delta[1] = value_delta_min; - min_delta[2] = (value_delta_max-value_delta_min); - - - return ts_block_delta; - } - - public static int encodeOutlier2Bytes( - ArrayList<Integer> ts_block_delta, - int bit_width, - int encode_pos, byte[] encoded_result) { - - encode_pos = bitPacking(ts_block_delta, 0, bit_width, encode_pos, encoded_result); - - int n_k = ts_block_delta.size(); - int n_k_b = n_k / 8; - long cur_remaining = 0; // encoded int - int cur_number_bits = 0; // the bit width used of encoded int - for (int i = n_k_b * 8; i < n_k; i++) { - long cur_value = ts_block_delta.get(i); - int cur_bit_width = bit_width; // remaining bit width of current value - - if (cur_number_bits + bit_width >= 32) { - cur_remaining <<= (32 - cur_number_bits); - cur_bit_width = bit_width - 32 + cur_number_bits; - cur_remaining += ((cur_value >> cur_bit_width)); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - cur_remaining = 0; - cur_number_bits = 0; - } - - cur_remaining <<= cur_bit_width; - cur_number_bits += cur_bit_width; - cur_remaining += (((cur_value << (32 - cur_bit_width)) & 0xFFFFFFFFL) >> (32 - cur_bit_width)); // - } - cur_remaining <<= (32 - cur_number_bits); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - return encode_pos; - - } - - - public static ArrayList<Integer> decodeOutlier2Bytes( - byte[] encoded, - int decode_pos, - int bit_width, - int length, - ArrayList<Integer> encoded_pos_result - ) { - - int n_k_b = length / 8; - int remaining = length - n_k_b * 8; - ArrayList<Integer> result_list = new ArrayList<>(decodeBitPacking(encoded, decode_pos, bit_width, n_k_b * 8 + 1)); - decode_pos += n_k_b * bit_width; - - ArrayList<Long> int_remaining = new ArrayList<>(); - int int_remaining_size = remaining * bit_width / 32 + 1; - for (int j = 0; j < int_remaining_size; j++) { - int_remaining.add(bytesLong2Integer(encoded, decode_pos)); - decode_pos += 4; - } - - int cur_remaining_bits = 32; // remaining bit width of current value - long cur_number = int_remaining.get(0); - int cur_number_i = 1; - for (int i = n_k_b * 8; i < length; i++) { - if (bit_width < cur_remaining_bits) { - int tmp = (int) (cur_number >> (32 - bit_width)); - result_list.add(tmp); - cur_number <<= bit_width; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits -= bit_width; - } else { - int tmp = (int) (cur_number >> (32 - cur_remaining_bits)); - int remain_bits = bit_width - cur_remaining_bits; - tmp <<= remain_bits; - - cur_number = int_remaining.get(cur_number_i); - cur_number_i++; - tmp += (int) (cur_number >> (32 - remain_bits)); - result_list.add(tmp); - cur_number <<= remain_bits; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits = 32 - remain_bits; - } - } - encoded_pos_result.add(decode_pos); - return result_list; - } - private static int BOSEncodeBits(int[] ts_block_delta, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal = new ArrayList<>(); - int k1 = 0; - int k2 = 0; - - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { - final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - k1++; - - - } else if (cur_value >= final_k_end_value) { - final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { - final_normal.add(cur_value - final_x_l_plus); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(min_delta[1],encode_pos,cur_byte); - encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - - if(k1==0 && k2==0){ - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// return encode_pos; - } - else{ - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - - bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } - } - - -// if(k1+k2!=block_size) - encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - if (k1 != 0) - encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); - if (k2 != 0) - encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); - return encode_pos; - - } - - private static void calculateBits(int k1, int k2, int block_size, int alpha, int gamma, int k1_start, int k2_end, int min_bits, int[] result){ - int cur_bits = 0; - cur_bits += Math.min((k1 + k2) * getBitWith(block_size - 1), block_size + k1 + k2); - if(k1!=0) - cur_bits += k1*alpha; - if (k1 + k2 != block_size) - cur_bits += (block_size - k1 - k2) * - getBitWith(k2_end -k1_start - 2); - if(k2!=0) - cur_bits += k2 * gamma; - if (cur_bits < min_bits) { - result[0] = cur_bits; - result[1] = k1_start; - result[2] = k2_end; - } - } - - - public static int findMedian(int[] arr) { - if (arr == null || arr.length == 0) { - throw new IllegalArgumentException("数组不能为空"); - } - int n = arr.length; - if (n > 400){ - return quickSelect(arr, 0, 99, 50); - }else if (n > 100){ - return quickSelect(arr, 0, (n - 1)/4, n / 8); - }else { - return quickSelect(arr, 0, n - 1, n / 2); - } - } - -// private static int quickSelect(int[] arr, int left, int right, int k){ -// int pivotV=arr[left+random.nextInt(right-left+1)],tmpV; -// int posEqual=left,posSmaller=left; // a[left,posEqual): = pivotV; a[posEqual,posSmaller): < pivotV -// for(int i=left;i<=right;i++){ -// if(arr[i]==pivotV){ -// tmpV=arr[i]; -// arr[i]=arr[posSmaller]; -// arr[posSmaller]=arr[posEqual]; -// arr[posEqual]=tmpV; -// posEqual++; -// posSmaller++; -// }else -// if(arr[i]<pivotV){ -// tmpV=arr[posSmaller]; -// arr[posSmaller]=arr[i]; -// arr[i]=tmpV; -// posSmaller++; -// } -// } -// if(k+(posEqual-left)<=posSmaller-1)return quickSelect(arr,posEqual,posSmaller-1,k+(posEqual-left)); -// else if(k<=posSmaller-1)return pivotV; -// else return quickSelect(arr,posSmaller,right,k); -// } - private static int quickSelect(int[] arr, int left, int right, int k) { - if (left == right) { - return arr[left]; - } - if (areAllElementsEqual(arr, left, right)) { - return arr[left]; - } - int pivotIndex = left + random.nextInt(right - left + 1); - swap(arr, pivotIndex, right); - - pivotIndex = partition(arr, left, right); - if (abs(pivotIndex - k) < (right - left)/6) { - return arr[k]; - } else if (k <= pivotIndex - (right - left)/6) { - return quickSelect(arr, left, pivotIndex - 1, k); - } else { - return quickSelect(arr, pivotIndex + 1, right, k); - } - } - private static boolean areAllElementsEqual(int[] arr, int left, int right) { - for (int i = left + 1; i <= right; i++) { - if (arr[i] != arr[left]) { - return false; - } - } - return true; - } - private static int partition(int[] arr, int left, int right) { - int pivot = arr[right]; - int i = left; - for (int j = left; j < right; j++) { - if (arr[j] <= pivot) { - swap(arr, i, j); - i++; - } - } - swap(arr, i, right); - return i; - } - - private static void swap(int[] arr, int i, int j) { - int temp = arr[i]; - arr[i] = arr[j]; - arr[j] = temp; - } - - public static long compress(byte[] data) throws IOException { - File file = new File("example.7z"); - file.createNewFile(); - Path path = Paths.get("example.7z"); - SeekableByteChannel channel = Files.newByteChannel(path, StandardOpenOption.READ, StandardOpenOption.WRITE); - SevenZOutputFile szof = new SevenZOutputFile(channel); - SevenZArchiveEntry entry = szof.createArchiveEntry(null, "entryName"); - szof.putArchiveEntry(entry); - szof.write(data); - szof.closeArchiveEntry(); - return channel.size(); - } - - public static int EncodeBits(int num, - int bit_width, - int encode_pos, - byte[] cur_byte, - int[] bit_index_list){ - // 找到要插入的位的索引 - int bit_index = bit_index_list[0] ;//cur_byte[encode_pos + 1]; - - // 计算数值的起始位位置 - int remaining_bits = bit_width; - - while (remaining_bits > 0) { - // 计算在当前字节中可以使用的位数 - int available_bits = bit_index; - int bits_to_write = Math.min(available_bits, remaining_bits); - - // 更新 bit_index - bit_index = available_bits - bits_to_write; - - // 计算要写入的位的掩码和数值 - int mask = (1 << bits_to_write) - 1; - int bits = (num >> (remaining_bits - bits_to_write)) & mask; - - // 写入到当前位置 - cur_byte[encode_pos] &= (byte) ~(mask << bit_index); // 清除对应位置的位 - cur_byte[encode_pos] |= (byte) (bits << bit_index); - - // 更新位宽和数值 - remaining_bits -= bits_to_write; - if (bit_index == 0) { - bit_index = 8; - encode_pos++; - } - } - bit_index_list[0] = bit_index; -// cur_byte[encode_pos + 1] = (byte) bit_index; - return encode_pos; - } - private static int BOSEncodeBitsImprove(int[] ts_block_delta, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal = new ArrayList<>(); - int k1 = 0; - int k2 = 0; - - - - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { -// encode_pos = EncodeBits(cur_value,left_bit_width,encode_pos,cur_byte); -// final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - k1++; - - - } else if (cur_value >= final_k_end_value) { -// encode_pos = EncodeBits(cur_value- final_k_end_value,right_bit_width,encode_pos,cur_byte); -// final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { -// final_normal.add(cur_value - final_x_l_plus); -// encode_pos = EncodeBits(cur_value- final_x_l_plus,right_bit_width,encode_pos,cur_byte); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(min_delta[1],encode_pos,cur_byte); - encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - int[] bit_index_list = new int[1]; - bit_index_list[0] = 8; - - if(final_k_start_value<0 && final_k_end_value > max_delta_value){ -// int bit_width_final= getBitWith(final_x_u_minus - final_x_l_plus); -// cur_byte[encode_pos+1] = 8; - for (int cur_value : ts_block_delta) { - encode_pos = EncodeBits(cur_value, bit_width_final, encode_pos, cur_byte, bit_index_list); -// final_normal.add(cur_value); - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } -// cur_byte[encode_pos+1] = 0; -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - return encode_pos; - } - - - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } -// cur_byte[encode_pos+1] = 8; -// bit_index_list[0] = 8; - for (int cur_value : ts_block_delta) { - if (cur_value <= final_k_start_value) { - encode_pos = EncodeBits(cur_value, left_bit_width, encode_pos, cur_byte,bit_index_list); - } else if (cur_value >= final_k_end_value) { - encode_pos = EncodeBits(cur_value - final_k_end_value, right_bit_width, encode_pos, cur_byte,bit_index_list); - } else { - encode_pos = EncodeBits(cur_value - final_x_l_plus, bit_width_final, encode_pos, cur_byte,bit_index_list); - } - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } - -// cur_byte[encode_pos+1] = 0; - -// if(k1==0 && k2==0){ -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// -// -// } -// else{ -// int2Bytes(final_x_l_plus,encode_pos,cur_byte); -// encode_pos += 4; -// int2Bytes(final_k_end_value,encode_pos,cur_byte); -// encode_pos += 4; -// -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(left_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(right_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// if (final_alpha == 0) { // 0 -// -// for (int i : bitmap_outlier) { -// -// intByte2Bytes(i,encode_pos,cur_byte); -// encode_pos += 1; -// } -// } else { -// encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// } -// } - - -// if(k1+k2!=block_size) -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// if (k1 != 0) -// encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); -// if (k2 != 0) -// encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); - return encode_pos; - - } - - private static int BOSBlockEncoder(int[] ts_block, int block_i, int block_size, int remaining, int encode_pos, byte[] cur_byte) { - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); -// System.out.println(Arrays.toString(ts_block_delta)); - - block_size = remaining - 1; - int max_delta_value = min_delta[2]; - - - int max_bit_width = getBitWith(max_delta_value) + 1; - - - int[] findMedianArray = new int[block_size]; - System.arraycopy(ts_block_delta, 0, findMedianArray, 0, block_size); - - int median = findMedian(findMedianArray); - - - - // int xl= median; - // int xu = median; - // xl = 2 median - xu - // xl = xu - 2 ^ beta - int left_number = 0; - int right_number = 0; - - int length_outlier = block_size; -// for(int value:findMedianArray){ -// if(value <=median) left_number++; -// if (value >= median) right_number ++; -// } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = (getBitWith(final_k_end_value - final_k_start_value - 2) * (block_size)); -// min_bits +=; - - - int[] count_left = new int[max_bit_width]; - int[] count_right = new int[max_bit_width]; - int count_0 = 0; - - - for(int i=0;i<length_outlier;i++){ - int cur_value = ts_block_delta[i]; - if(cur_value > median){ - int beta = getBitWith(cur_value - median) ; - count_right[beta] ++; - } else if (cur_value < median) { - int beta = getBitWith(median - cur_value) ; - count_left[beta] ++; - }else{ - count_0 ++; - } - } - - - for(int beta = max_bit_width - 1; beta > 0 ; beta --){ - left_number += count_left[beta]; - right_number += count_right[beta]; - int pow_beta = 1 << (beta-1); - int xu = min(max_delta_value+1, median + pow_beta) ; - int xl = max(median - pow_beta,-1); - int cur_bits = Math.min((left_number + right_number) * getBitWith(block_size - 1), block_size + left_number + right_number); - cur_bits += left_number * getBitWith(xl); - cur_bits += right_number * getBitWith(max_delta_value - xu); - cur_bits += (block_size - left_number - right_number) * getBitWith(xu - xl - 2); - if (cur_bits < min_bits) { - min_bits = cur_bits; - - final_k_start_value = xl; - final_x_l_plus = xl + 1; - final_k_end_value = xu; - final_x_u_minus = xu -1; - } - - } -// if(min_bits == bp_cost){ -// -// } -// else -// System.out.println(min_bits/8); - encode_pos = BOSEncodeBits(ts_block_delta, final_k_start_value, final_x_l_plus, final_k_end_value, final_x_u_minus, - max_delta_value, min_delta, encode_pos , cur_byte); - - - return encode_pos; - } - - public static int minNumberIndex(int alpha, int beta, int gamma){ - if(alpha<beta && beta < gamma){ - return 1; // prop 4.2 - } else if (alpha>beta && beta > gamma) { - return 2;// prop 4.3 - } else if (beta<alpha && beta <gamma) { - return 3;// prop 4.4 - }else{ - return 0; // alpha=beta=gamma - } - } - - private static void addToArchiveCompression(SevenZOutputFile out, File file, String dir) { - String name = dir + File.separator + file.getName(); - if(dir.equals(".")) { - name = file.getName(); - } - if (file.isFile()){ - SevenZArchiveEntry entry = null; - FileInputStream in = null; - try { - entry = out.createArchiveEntry(file, name); - out.putArchiveEntry(entry); - in = new FileInputStream(file); - byte[] b = new byte[1024]; - int count = 0; - while ((count = in.read(b)) > 0) { - out.write(b, 0, count); - } - } catch (IOException e) { - e.printStackTrace(); - } finally { - try { - out.closeArchiveEntry(); - in.close(); - } catch (IOException e) { - e.printStackTrace(); - } - - } - } else if (file.isDirectory()) { - File[] children = file.listFiles(); - if (children != null){ - for (File child : children){ - addToArchiveCompression(out, child, name); - } - } - } else { - System.out.println(file.getName() + " is not supported"); - } - } - - public static int BOSEncoder( - int[] data, int block_size, byte[] encoded_result) { - block_size++; - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - -// for (int i = 0; i < 1; i++) { - for (int i = 0; i < block_num; i++) { -// int start_encode_pos = encode_pos; - encode_pos = BOSBlockEncoder(data, i, block_size, block_size,encode_pos,encoded_result); -// System.out.println(encode_pos-start_encode_pos); -// System.out.println("------------------------------------------"); - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } - else { - - int start = block_num * block_size; - int remaining = length_all-start; - - - encode_pos = BOSBlockEncoder(data, block_num, block_size,remaining, encode_pos,encoded_result); - - } - - - return encode_pos; - } - - private static int BOSBlockEncoderImprove(int[] ts_block, int block_i, int block_size, int remaining, int encode_pos, byte[] cur_byte) { - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); -// System.out.println(Arrays.toString(ts_block_delta)); - - block_size = remaining - 1; - int max_delta_value = min_delta[2]; - - - int max_bit_width = getBitWith(max_delta_value) + 1; - - - int[] findMedianArray = new int[block_size]; - System.arraycopy(ts_block_delta, 0, findMedianArray, 0, block_size); - - int median = findMedian(findMedianArray); - - - - // int xl= median; - // int xu = median; - // xl = 2 median - xu - // xl = xu - 2 ^ beta - int left_number = 0; - int right_number = 0; - - int length_outlier = block_size; -// for(int value:findMedianArray){ -// if(value <=median) left_number++; -// if (value >= median) right_number ++; -// } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = (getBitWith(final_k_end_value - final_k_start_value - 2) * (block_size)); -// min_bits +=; - - - int[] count_left = new int[max_bit_width]; - int[] count_right = new int[max_bit_width]; - int count_0 = 0; - - - for(int i=0;i<length_outlier;i++){ - int cur_value = ts_block_delta[i]; - if(cur_value > median){ - int beta = getBitWith(cur_value - median) ; - count_right[beta] ++; - } else if (cur_value < median) { - int beta = getBitWith(median - cur_value) ; - count_left[beta] ++; - }else{ - count_0 ++; - } - } - - - for(int beta = max_bit_width - 1; beta > 0 ; beta --){ - left_number += count_left[beta]; - right_number += count_right[beta]; - int pow_beta = 1<< (beta-1); - int xu = min(max_delta_value+1, median + pow_beta) ; - int xl = max(median - pow_beta,-1); - int cur_bits = Math.min((left_number + right_number) * getBitWith(block_size - 1), block_size + left_number + right_number); - cur_bits += left_number * getBitWith(xl); - cur_bits += right_number * getBitWith(max_delta_value - xu); - cur_bits += (block_size - left_number - right_number) * getBitWith(xu - xl - 2); - if (cur_bits < min_bits) { - min_bits = cur_bits; - - final_k_start_value = xl; - final_x_l_plus = xl + 1; - final_k_end_value = xu; - final_x_u_minus = xu -1; - } - - } -// if(min_bits == bp_cost){ -// -// } -// else -// System.out.println(min_bits/8); - encode_pos = BOSEncodeBitsImprove(ts_block_delta, final_k_start_value, final_x_l_plus, final_k_end_value, final_x_u_minus, - max_delta_value, min_delta, encode_pos , cur_byte); - - - return encode_pos; - } - - public static int BOSEncoderImprove( - int[] data, int block_size, byte[] encoded_result) { - block_size++; - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; -// int[] test_block = new int[block_size]; -// System.arraycopy(data, 0, test_block, 0, block_size); -// System.out.println(Arrays.toString(test_block)); -// for (int i = 0; i < 1; i++) { - for (int i = 0; i < block_num; i++) { -// int start_encode_pos = encode_pos; - encode_pos = BOSBlockEncoderImprove(data, i, block_size, block_size,encode_pos,encoded_result); -// System.out.println(encode_pos-start_encode_pos); -// System.out.println("------------------------------------------"); - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } - else { - - int start = block_num * block_size; - int remaining = length_all-start; - - - encode_pos = BOSBlockEncoderImprove(data, block_num, block_size,remaining, encode_pos,encoded_result); - - } - - - return encode_pos; - } - - public static int BOSBlockDecoder(byte[] encoded, int decode_pos, int[] value_list, int block_size, int[] value_pos_arr) { - - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - - int value0 = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] =value0; - value_pos_arr[0] ++; - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal= new ArrayList<>();; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int final_k_start_value = 0; - int final_k_end_value = 0; - int bit_width_final = 0; - int left_bit_width = 0; - int right_bit_width = 0; - - if(k1!=0 || k2 != 0){ - final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (block_size + k1 + k2) / (double) 8); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < block_size ) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - }else { - bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - } - - - - - ArrayList<Integer> decode_pos_normal = new ArrayList<>(); - final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size - k1 - k2, decode_pos_normal); - - decode_pos = decode_pos_normal.get(0); - if (k1 != 0) { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); - decode_pos = decode_pos_result_left.get(0); - } - if (k2 != 0) { - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); - decode_pos = decode_pos_result_right.get(0); - } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; - int pre_v = value0; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - - - for (int i = 0; i < block_size; i++) { - int current_delta; - if (left_outlier_i >= k1) { - if (right_outlier_i >= k2) { - current_delta = min_delta + final_normal.get(normal_i) + final_k_start_value; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = min_delta + final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = min_delta + final_normal.get(normal_i) + final_k_start_value; - normal_i++; - } - } else if (i == final_left_outlier_index.get(left_outlier_i)) { - current_delta = min_delta + final_left_outlier.get(left_outlier_i); - left_outlier_i++; - } else { - - if (right_outlier_i >= k2) { - current_delta = min_delta + final_normal.get(normal_i) + final_k_start_value; - normal_i++; - } else if (i == final_right_outlier_index.get(right_outlier_i)) { - current_delta = min_delta + final_right_outlier.get(right_outlier_i) + final_k_end_value; - right_outlier_i++; - } else { - current_delta = min_delta + final_normal.get(normal_i) + final_k_start_value; - normal_i++; - } - } - - pre_v = current_delta + pre_v; - value_list[value_pos_arr[0]] = pre_v; - value_pos_arr[0]++; - } - return decode_pos; - } - public static int DecodeBits(byte[] cur_byte, int bit_width, int[] decode_pos_list) { - int decode_pos = decode_pos_list[0]; - int bit_index = decode_pos_list[1]; //cur_byte[decode_pos + 1]; - int remaining_bits = bit_width; - int num = 0; - - while (remaining_bits > 0) { - int available_bits = bit_index; - int bits_to_read = Math.min(available_bits, remaining_bits); - - // 计算要读取的位的掩码 - int mask = (1 << bits_to_read) - 1; - int bits = (cur_byte[decode_pos] >> (available_bits - bits_to_read)) & mask; - - // 将读取的位合并到结果中 - num = (num << bits_to_read) | bits; - - // 更新位宽和 bit_index - remaining_bits -= bits_to_read; - bit_index = available_bits - bits_to_read; - - if (bit_index == 0) { - bit_index = 8; - decode_pos++; - } - } - decode_pos_list[0] = decode_pos; - decode_pos_list[1] = bit_index; - - return num; - } - public static int BOSBlockDecoderImprove(byte[] encoded, int decode_pos, int[] value_list, int block_size, int[] value_pos_arr) { - - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - - int value0 = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] =value0; - value_pos_arr[0] ++; - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - int[] decode_list = new int[2]; - decode_list[0]= decode_pos; - decode_list[1]= 8; - - int valuePos = value_pos_arr[0]; - - if(k1==0 && k2==0){ - int pre_v = value0; - for (int i = 0; i < block_size; i++) { - int cur_delta = min_delta + DecodeBits(encoded, bit_width_final, decode_list); - pre_v += cur_delta; - value_list[valuePos++] = pre_v; - } - value_pos_arr[0] = valuePos; - return decode_list[0]; - } - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); -// ArrayList<Integer> final_left_outlier = new ArrayList<>(); -// ArrayList<Integer> final_right_outlier = new ArrayList<>(); -// ArrayList<Integer> final_normal= new ArrayList<>();; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int final_k_start_value = 0; - int final_k_end_value = 0; -// int bit_width_final = 0; - int left_bit_width = 0; - int right_bit_width = 0; - - final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - -// bit_width_final = bytes2Integer(encoded, decode_pos, 1); -// decode_pos += 1; - - left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (block_size + k1 + k2) / (double) 8); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < block_size ) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - - - - - -// ArrayList<Integer> decode_pos_normal = new ArrayList<>(); -// final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size - k1 - k2, decode_pos_normal); -// -// decode_pos = decode_pos_normal.get(0); -// if (k1 != 0) { -// ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); -// final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); -// decode_pos = decode_pos_result_left.get(0); -// } -// if (k2 != 0) { -// ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); -// final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); -// decode_pos = decode_pos_result_right.get(0); -// } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; - int pre_v = value0; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - -// Precompute constants - int normalOffset = min_delta + final_k_start_value; - int rightOutlierOffset = min_delta + final_k_end_value; - -// Initialize indices and pre-fetch next outlier positions - int leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - int rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - decode_list[0]= decode_pos; -// decode_list[1]= 8; - // Use a local variable for the position - for (int i = 0; i < block_size; i++) { - int currentDelta; - if (i == leftOutlierNextIndex) { - // Process left outlier - currentDelta = min_delta + DecodeBits(encoded, left_bit_width, decode_list); // final_left_outlier.get(left_outlier_i); - left_outlier_i++; - leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - } else if (i == rightOutlierNextIndex) { - // Process right outlier - currentDelta = rightOutlierOffset + DecodeBits(encoded, right_bit_width, decode_list);// final_right_outlier.get(right_outlier_i); - right_outlier_i++; - rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - } else { - // Process normal value - currentDelta = normalOffset + DecodeBits(encoded, bit_width_final, decode_list); - normal_i++; - } - - // Update the cumulative value and store it - pre_v += currentDelta; - value_list[valuePos++] = pre_v; - } - value_pos_arr[0] = valuePos; - if(decode_list[1]!=8){ - return decode_list[0]+1; - }else { - return decode_list[0]; - } -// decode_pos = decode_list[0]; -// Update the position in the array - - -// return decode_pos; - } - - public static void BOSDecoder(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - - int[] value_list = new int[length_all+block_size]; - block_size--; - - int[] value_pos_arr = new int[1]; - for (int k = 0; k < block_num; k++) { - decode_pos = BOSBlockDecoder(encoded, decode_pos, value_list, block_size,value_pos_arr); - } - - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - remain_length --; - BOSBlockDecoder(encoded, decode_pos, value_list, remain_length, value_pos_arr); - } - } - public static void BOSDecoderImprove(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - - int[] value_list = new int[length_all+block_size]; - block_size--; - - int[] value_pos_arr = new int[1]; - for (int k = 0; k < block_num; k++) { - - - decode_pos = BOSBlockDecoderImprove(encoded, decode_pos, value_list, block_size,value_pos_arr); - - } - - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - remain_length --; - BOSBlockDecoderImprove(encoded, decode_pos, value_list, remain_length, value_pos_arr); - } -// int[] test_block = new int[block_size]; -// System.arraycopy(value_list, 0, test_block, 0, block_size); -// System.out.println(Arrays.toString(test_block)); -// System.out.println(value_list[1000]); - } - - @Test - public void BOSImproveEncodeTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/bos_m_improve"; -// String output_parent_dir = parent_dir + "icde0802/compression_ratio/test"; -// String output_parent_dir = parent_dir + "icde0802/supply_experiment/R2O3_lower_outlier_compare/compression_ratio/bos"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; - -// for (int file_i = 0; file_i < 1; file_i++) { - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f= tempList[1]; - - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "TS_2DIFF+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - - } - writer.close(); - - } - } - @Test - public void BOSImproveDecodeTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R1O4_decode_time/compression_ratio/bos_m"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 9; file_i < 10; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[2]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "TS_2DIFF+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - writer.close(); - } - } - - @Test - public void BOSPreviousDecodeTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/bos_m"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 9; file_i < 10; file_i++) { -// - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[2]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "TS_2DIFF+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - writer.close(); - } - } - - @Test - public void compressBOSTest() throws IOException { -// String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path - String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R3O2_compare_compression/compression_ratio/bos_m_comp"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(256); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 5; -// for (int file_i = 8; file_i < 9; file_i++) { - - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { -// for (int file_i = input_path_list.size()-1; file_i >=0 ; file_i--) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", -// "Compress Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - loader.readHeaders(); - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - inputStream.close(); - - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - double ratio = 0; - double compressed_size = 0; - long compressTime = encodeTime; - s = System.nanoTime(); - byte[] compressed = new byte[0]; - for (int repeat = 0; repeat < repeatTime2; repeat++) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - BZip2CompressorOutputStream bzip2Out = new BZip2CompressorOutputStream(baos); - bzip2Out.write(encoded_result); - bzip2Out.finish(); - compressed = baos.toByteArray(); - } - e = System.nanoTime(); - compressTime += ((e - s) / repeatTime2); - - // test compression ratio and compressed size - compressed_size += compressed.length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); -// for (int repeat = 0; repeat < repeatTime2; repeat++) -// BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "BOS+"+ "7-Zip", - String.valueOf(compressTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - writer.close(); - } - } - - @Test - public void compressBOSTest2() throws IOException { -// String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path - String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R3O2_compare_compression/compression_ratio/bos_m_lz4_comp"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(256); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 500; -// for (int file_i = 8; file_i < 9; file_i++) { - CompressionType[] compressList = { - CompressionType.LZ4, - }; - - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { -// for (int file_i = input_path_list.size()-1; file_i >=0 ; file_i--) { - - String inputPath = input_path_list.get(file_i); -// System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", -// "Compress Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - loader.readHeaders(); - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - inputStream.close(); - - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - - for (CompressionType comp : compressList) { - double ratio = 0; - double compressed_size = 0; - long compressTime = encodeTime; -// compressTime += ((e - s) / repeatTime2); - System.out.println(compressTime); - - s = System.nanoTime(); - ICompressor compressor = ICompressor.getCompressor(comp); - byte[] compressed = new byte[0]; - for (int repeat = 0; repeat < repeatTime2; repeat++) { - compressed = compressor.compress(encoded_result); - } - e = System.nanoTime(); - compressTime += ((e - s) / repeatTime2); - - System.out.println(compressTime-encodeTime); - // test compression ratio and compressed size - compressed_size += compressed.length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); -// for (int repeat = 0; repeat < repeatTime2; repeat++) -// BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "BOS+"+ comp, - String.valueOf(compressTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); -// System.out.println(ratio); - } - } - writer.close(); - } - } - - @Test - public void ExpTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; -// String output_parent_dir = parent_dir + "icde0802/compression_ratio/exp_test"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R2O3_lower_outlier_compare/compression_ratio/bos"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("Synthetic_Exp_100"); - dataset_name.add("Synthetic_Exp_1000"); - dataset_name.add("Synthetic_Exp_10000"); - dataset_name.add("Synthetic_Exp_100000"); - dataset_name.add("Synthetic_Exp_1000000"); - dataset_name.add("Synthetic_Normal_100"); - dataset_name.add("Synthetic_Normal_1000"); - dataset_name.add("Synthetic_Normal_10000"); - dataset_name.add("Synthetic_Normal_100000"); - dataset_name.add("Synthetic_Normal_1000000"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/Exp_100.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Exp_1000.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Exp_10000.csv");// 2 - output_path_list.add(output_parent_dir + "/Exp_100000.csv");// 2 - output_path_list.add(output_parent_dir + "/Exp_1000000.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Normal_100.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Normal_1000.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Normal_10000.csv");//5 - output_path_list.add(output_parent_dir + "/Normal_100000.csv");//5 - output_path_list.add(output_parent_dir + "/Normal_1000000.csv");//5 -// dataset_block_size.add(2048); - - int repeatTime2 = 100; - -// for (int file_i = 0; file_i < 1; file_i++) { - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { - if(f.toString().contains(".DS")) continue; -// f= tempList[1]; - - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "TS_2DIFF+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - - } - writer.close(); - - } - } - - @Test - public void BOSVaryBlockSize() throws IOException { - String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; -// String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String output_parent_dir = parent_dir + "icde0802/compression_ratio/block_size_bos_m"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/block_size_bos_m"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 - - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 - - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 - - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 - - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 - - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 - - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 - - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 - - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 - - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 - - int repeatTime2 = 100; -// int[] file_i_list = {0,9,10}; -// for (int file_i = 9; file_i < 10; file_i++) { -// for(int file_i :file_i_list){ - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Block Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - -// for (int block_size_i = 5; block_size_i < 14; block_size_i++) { - for (int block_size_i = 13; block_size_i > 4; block_size_i--) { -// for (int block_size_i = 10; block_size_i > 9; block_size_i--) { - int block_size = (int) Math.pow(2, block_size_i); - System.out.println(block_size); - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - compressed_size = BOSEncoderImprove(data2_arr, block_size, encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoderImprove(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - String[] record = { - f.toString(), - "TS_2DIFF+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(block_size_i), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - - } - - } - writer.close(); - - } - } - - - @Test - public void BOSQueryTest() throws IOException, InterruptedException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R2O2_query_processing/time/bos_m"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 500; -// for (int file_i = 1; file_i < 2; file_i++) { - - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Insert Time", - "Query Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[2]; - System.out.println(f); -// Thread.sleep(5000); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - String file_bin_str = "icde0802/supply_experiment/R2O2_query_processing/time/str_1.bin"; - long input_time = 0; - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - long start_encode = System.nanoTime(); - length = BOSEncoderImprove(data2_arr, dataset_block_size.get(file_i), encoded_result); - long end_encode = System.nanoTime(); - try (FileOutputStream fos = new FileOutputStream(parent_dir +file_bin_str)) { - // 只写入前length个元素 - fos.write(encoded_result, 0, length); - } catch (IOException ioe) { - ioe.printStackTrace(); - } - long end_io = System.nanoTime(); - encodeTime += (end_encode - start_encode); - input_time += (end_io - end_encode); - } - - - long e = System.nanoTime(); - encodeTime = (encodeTime / repeatTime2); - input_time = (input_time / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - - long output_time = 0; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++){ - long start_io = System.nanoTime(); - - try (FileInputStream fis = new FileInputStream(parent_dir +file_bin_str)) { - // 读取数据到byte数组中 - int bytesRead = fis.read(encoded_result); -// System.out.println("Number of bytes read: " + bytesRead); - } catch (IOException ioe) { - ioe.printStackTrace(); - } - long start_decode = System.nanoTime(); - BOSDecoderImprove(encoded_result); - long end_decode = System.nanoTime(); - decodeTime += (end_decode - start_decode); - output_time += (start_decode - start_io); - } - - e = System.nanoTime(); - decodeTime = (decodeTime / repeatTime2); - output_time = (output_time / repeatTime2); - - - String[] record = { - f.toString(), - "TS_2DIFF+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(input_time), - String.valueOf(output_time), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - writer.close(); - } - } - - @Test - public void BOSMOUOTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R2O5_skew_distribution/compression_ratio/bos_m_improve"; -// String output_parent_dir = parent_dir + "icde0802/compression_ratio/test"; -// String output_parent_dir = parent_dir + "icde0802/supply_experiment/R2O3_lower_outlier_compare/compression_ratio/bos"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 200; - -// for (int file_i = 6; file_i < 7; file_i++) { - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - if(file_i == 4 ) repeatTime2 = 1; - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f= tempList[1]; - - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "TS_2DIFF+BOS-M", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - - } - writer.close(); - - } - } -}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFBOSVImproveDecodeTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFBOSVImproveDecodeTest.java deleted file mode 100644 index 3aa69b3..0000000 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFBOSVImproveDecodeTest.java +++ /dev/null
@@ -1,1895 +0,0 @@ -package org.apache.iotdb.tsfile.encoding; - -import com.csvreader.CsvReader; -import com.csvreader.CsvWriter; -import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry; -import org.apache.commons.compress.archivers.sevenz.SevenZOutputFile; -import org.apache.iotdb.tsfile.compress.ICompressor; -import org.apache.iotdb.tsfile.compress.IUnCompressor; -import org.apache.iotdb.tsfile.encoding.decoder.Decoder; -import org.apache.iotdb.tsfile.encoding.encoder.Encoder; -import org.apache.iotdb.tsfile.encoding.encoder.FloatEncoder; -import org.apache.iotdb.tsfile.encoding.encoder.TSEncodingBuilder; -import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType; -import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; -import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding; -import org.junit.Test; - -import java.io.*; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; - -import static java.lang.Math.pow; -//import static org.apache.iotdb.tsfile.encoding.CompressTest.addToArchiveCompression; - -public class TSDIFFBOSVImproveDecodeTest { - - public static long combine2Int(int int1, int int2) { - return ((long) int1 << 32) | (int2 & 0xFFFFFFFFL); - } - - public static int getTime(long long1) { - return ((int) (long1 >> 32)); - } - - public static int getValue(long long1) { - return ((int) (long1)); - } - - public static int getCount(long long1, int mask) { - return ((int) (long1 & mask)); - } - public static int getUniqueValue(long long1, int left_shift) { - return ((int) ((long1) >> left_shift)); - } - - public static int getBitWith(int num) { - if (num == 0) return 1; - else return 32 - Integer.numberOfLeadingZeros(num); - } - - public static void int2Bytes(int integer,int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - - public static void intByte2Bytes(int integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer); - } - - private static void long2intBytes(long integer, int encode_pos , byte[] cur_byte) { - cur_byte[encode_pos] = (byte) (integer >> 24); - cur_byte[encode_pos+1] = (byte) (integer >> 16); - cur_byte[encode_pos+2] = (byte) (integer >> 8); - cur_byte[encode_pos+3] = (byte) (integer); - } - - public static int bytes2Integer(byte[] encoded, int start, int num) { - int value = 0; - if (num > 4) { - System.out.println("bytes2Integer error"); - return 0; - } - for (int i = 0; i < num; i++) { - value <<= 8; - int b = encoded[i + start] & 0xFF; - value |= b; - } - return value; - } - - private static long bytesLong2Integer(byte[] encoded, int decode_pos) { - long value = 0; - for (int i = 0; i < 4; i++) { - value <<= 8; - int b = encoded[i + decode_pos] & 0xFF; - value |= b; - } - return value; - } - - public static void pack8Values(ArrayList<Integer> values, int offset, int width, int encode_pos, byte[] encoded_result) { - int bufIdx = 0; - int valueIdx = offset; - // remaining bits for the current unfinished Integer - int leftBit = 0; - - while (valueIdx < 8 + offset) { - // buffer is used for saving 32 bits as a part of result - int buffer = 0; - // remaining size of bits in the 'buffer' - int leftSize = 32; - - // encode the left bits of current Integer to 'buffer' - if (leftBit > 0) { - buffer |= (values.get(valueIdx) << (32 - leftBit)); - leftSize -= leftBit; - leftBit = 0; - valueIdx++; - } - - while (leftSize >= width && valueIdx < 8 + offset) { - // encode one Integer to the 'buffer' - buffer |= (values.get(valueIdx)<< (leftSize - width)); - leftSize -= width; - valueIdx++; - } - // If the remaining space of the buffer can not save the bits for one Integer, - if (leftSize > 0 && valueIdx < 8 + offset) { - // put the first 'leftSize' bits of the Integer into remaining space of the - // buffer - buffer |= (values.get(valueIdx) >>> (width - leftSize)); - leftBit = width - leftSize; - } - - // put the buffer into the final result - for (int j = 0; j < 4; j++) { - encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); - encode_pos ++; - bufIdx++; - if (bufIdx >= width) { - return ; - } - } - } -// return encode_pos; - } - - public static void unpack8Values(byte[] encoded, int offset,int width, ArrayList<Integer> result_list) { - int byteIdx = offset; - long buffer = 0; - // total bits which have read from 'buf' to 'buffer'. i.e., - // number of available bits to be decoded. - int totalBits = 0; - int valueIdx = 0; - - while (valueIdx < 8) { - // If current available bits are not enough to decode one Integer, - // then add next byte from buf to 'buffer' until totalBits >= width - while (totalBits < width) { - buffer = (buffer << 8) | (encoded[byteIdx] & 0xFF); - byteIdx++; - totalBits += 8; - } - - // If current available bits are enough to decode one Integer, - // then decode one Integer one by one until left bits in 'buffer' is - // not enough to decode one Integer. - while (totalBits >= width && valueIdx < 8) { - result_list.add ((int) (buffer >>> (totalBits - width))); - valueIdx++; - totalBits -= width; - buffer = buffer & ((1L << totalBits) - 1); - } - } - } - - public static int bitPacking(ArrayList<Integer> numbers, int start, int bit_width,int encode_pos, byte[] encoded_result) { - int block_num = (numbers.size()-start) / 8; - for(int i=0;i<block_num;i++){ - pack8Values( numbers, start+i*8, bit_width,encode_pos, encoded_result); - encode_pos +=bit_width; - } - - return encode_pos; - - } - - public static ArrayList<Integer> decodeBitPacking( - byte[] encoded, int decode_pos, int bit_width, int block_size) { - ArrayList<Integer> result_list = new ArrayList<>(); - int block_num = (block_size - 1) / 8; - - for (int i = 0; i < block_num; i++) { // bitpacking - unpack8Values( encoded, decode_pos, bit_width, result_list); - decode_pos += bit_width; - - } - return result_list; - } - - - public static int[] getAbsDeltaTsBlock( - int[] ts_block, - int i, - int block_size, - int remaining, - int[] min_delta) { - int[] ts_block_delta = new int[remaining-1]; - - int value_delta_min = Integer.MAX_VALUE; - int value_delta_max = Integer.MIN_VALUE; - int base = i*block_size+1; - int end = i*block_size+remaining; - - int tmp_j_1 = ts_block[base-1]; - min_delta[0] =tmp_j_1; - int j = base; - int tmp_j; - - while(j<end){ - tmp_j = ts_block[j]; - int epsilon_v = tmp_j - tmp_j_1; - ts_block_delta[j-base] = epsilon_v; - if (epsilon_v < value_delta_min) { - value_delta_min = epsilon_v; - } - if (epsilon_v > value_delta_max) { - value_delta_max = epsilon_v; - } - tmp_j_1 = tmp_j; - j++; - } - j = 0; - end = remaining -1; - while(j<end){ - ts_block_delta[j] = ts_block_delta[j] - value_delta_min; - j++; - } - - min_delta[1] = value_delta_min; - min_delta[2] = (value_delta_max-value_delta_min); - - - return ts_block_delta; - } - - - public static int encodeOutlier2Bytes( - ArrayList<Integer> ts_block_delta, - int bit_width, - int encode_pos, byte[] encoded_result) { - - encode_pos = bitPacking(ts_block_delta, 0, bit_width, encode_pos, encoded_result); - - int n_k = ts_block_delta.size(); - int n_k_b = n_k / 8; - long cur_remaining = 0; // encoded int - int cur_number_bits = 0; // the bit width used of encoded int - for (int i = n_k_b * 8; i < n_k; i++) { - long cur_value = ts_block_delta.get(i); - int cur_bit_width = bit_width; // remaining bit width of current value - - if (cur_number_bits + bit_width >= 32) { - cur_remaining <<= (32 - cur_number_bits); - cur_bit_width = bit_width - 32 + cur_number_bits; - cur_remaining += ((cur_value >> cur_bit_width)); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - cur_remaining = 0; - cur_number_bits = 0; - } - - cur_remaining <<= cur_bit_width; - cur_number_bits += cur_bit_width; - cur_remaining += (((cur_value << (32 - cur_bit_width)) & 0xFFFFFFFFL) >> (32 - cur_bit_width)); // - } - cur_remaining <<= (32 - cur_number_bits); - long2intBytes(cur_remaining,encode_pos,encoded_result); - encode_pos += 4; - return encode_pos; - - } - - - public static ArrayList<Integer> decodeOutlier2Bytes( - byte[] encoded, - int decode_pos, - int bit_width, - int length, - ArrayList<Integer> encoded_pos_result - ) { - - int n_k_b = length / 8; - int remaining = length - n_k_b * 8; - ArrayList<Integer> result_list = new ArrayList<>(decodeBitPacking(encoded, decode_pos, bit_width, n_k_b * 8 + 1)); - decode_pos += n_k_b * bit_width; - - ArrayList<Long> int_remaining = new ArrayList<>(); - int int_remaining_size = remaining * bit_width / 32 + 1; - for (int j = 0; j < int_remaining_size; j++) { - int_remaining.add(bytesLong2Integer(encoded, decode_pos)); - decode_pos += 4; - } - - int cur_remaining_bits = 32; // remaining bit width of current value - long cur_number = int_remaining.get(0); - int cur_number_i = 1; - for (int i = n_k_b * 8; i < length; i++) { - if (bit_width < cur_remaining_bits) { - int tmp = (int) (cur_number >> (32 - bit_width)); - result_list.add(tmp); - cur_number <<= bit_width; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits -= bit_width; - } else { - int tmp = (int) (cur_number >> (32 - cur_remaining_bits)); - int remain_bits = bit_width - cur_remaining_bits; - tmp <<= remain_bits; - - cur_number = int_remaining.get(cur_number_i); - cur_number_i++; - tmp += (int) (cur_number >> (32 - remain_bits)); - result_list.add(tmp); - cur_number <<= remain_bits; - cur_number &= 0xFFFFFFFFL; - cur_remaining_bits = 32 - remain_bits; - } - } - encoded_pos_result.add(decode_pos); - return result_list; - } - - public static int EncodeBits(int num, - int bit_width, - int encode_pos, - byte[] cur_byte, - int[] bit_index_list){ - // 找到要插入的位的索引 - int bit_index = bit_index_list[0] ;//cur_byte[encode_pos + 1]; - - // 计算数值的起始位位置 - int remaining_bits = bit_width; - - while (remaining_bits > 0) { - // 计算在当前字节中可以使用的位数 - int available_bits = bit_index; - int bits_to_write = Math.min(available_bits, remaining_bits); - - // 更新 bit_index - bit_index = available_bits - bits_to_write; - - // 计算要写入的位的掩码和数值 - int mask = (1 << bits_to_write) - 1; - int bits = (num >> (remaining_bits - bits_to_write)) & mask; - - // 写入到当前位置 - cur_byte[encode_pos] &= (byte) ~(mask << bit_index); // 清除对应位置的位 - cur_byte[encode_pos] |= (byte) (bits << bit_index); - - // 更新位宽和数值 - remaining_bits -= bits_to_write; - if (bit_index == 0) { - bit_index = 8; - encode_pos++; - } - } - bit_index_list[0] = bit_index; -// cur_byte[encode_pos + 1] = (byte) bit_index; - return encode_pos; - } - private static int BOSEncodeBits(int[] ts_block_delta, - int final_k_start_value, - int final_x_l_plus, - int final_k_end_value, - int final_x_u_minus, - int max_delta_value, - int[] min_delta, - int encode_pos, - byte[] cur_byte) { - int block_size = ts_block_delta.length; - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_left_outlier = new ArrayList<>(); - ArrayList<Integer> final_right_outlier = new ArrayList<>(); - ArrayList<Integer> final_normal = new ArrayList<>(); - int k1 = 0; - int k2 = 0; - - - - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int index_bitmap_outlier = 0; - int cur_index_bitmap_outlier_bits = 0; - for (int i = 0; i < block_size; i++) { - int cur_value = ts_block_delta[i]; - if ( cur_value<= final_k_start_value) { -// encode_pos = EncodeBits(cur_value,left_bit_width,encode_pos,cur_byte); -// final_left_outlier.add(cur_value); - final_left_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 3; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 1; - cur_index_bitmap_outlier_bits = 1; - } - k1++; - - - } else if (cur_value >= final_k_end_value) { -// encode_pos = EncodeBits(cur_value- final_k_end_value,right_bit_width,encode_pos,cur_byte); -// final_right_outlier.add(cur_value - final_k_end_value); - final_right_outlier_index.add(i); - if (cur_index_bitmap_outlier_bits % 8 != 7) { - index_bitmap_outlier <<= 2; - index_bitmap_outlier += 2; - cur_index_bitmap_outlier_bits += 2; - } else { - index_bitmap_outlier <<= 1; - index_bitmap_outlier += 1; - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - cur_index_bitmap_outlier_bits = 1; - } - k2++; - - } else { -// final_normal.add(cur_value - final_x_l_plus); -// encode_pos = EncodeBits(cur_value- final_x_l_plus,right_bit_width,encode_pos,cur_byte); - index_bitmap_outlier <<= 1; - cur_index_bitmap_outlier_bits += 1; - } - if (cur_index_bitmap_outlier_bits % 8 == 0) { - bitmap_outlier.add(index_bitmap_outlier); - index_bitmap_outlier = 0; - } - } - if (cur_index_bitmap_outlier_bits % 8 != 0) { - - index_bitmap_outlier <<= (8 - cur_index_bitmap_outlier_bits % 8); - - index_bitmap_outlier &= 0xFF; - bitmap_outlier.add(index_bitmap_outlier); - } - - int final_alpha = ((k1 + k2) * getBitWith(block_size-1)) <= (block_size + k1 + k2) ? 1 : 0; - - - int k_byte = (k1 << 1); - k_byte += final_alpha; - k_byte += (k2 << 16); - - int2Bytes(k_byte,encode_pos,cur_byte); - encode_pos += 4; - - - int2Bytes(min_delta[0],encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(min_delta[1],encode_pos,cur_byte); - encode_pos += 4; - - int bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); - intByte2Bytes(bit_width_final,encode_pos,cur_byte); - encode_pos += 1; - int[] bit_index_list = new int[1]; - bit_index_list[0] = 8; - - if(final_k_start_value<0 && final_k_end_value > max_delta_value){ -// int bit_width_final= getBitWith(final_x_u_minus - final_x_l_plus); -// cur_byte[encode_pos+1] = 8; - for (int cur_value : ts_block_delta) { - encode_pos = EncodeBits(cur_value, bit_width_final, encode_pos, cur_byte, bit_index_list); -// final_normal.add(cur_value); - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } -// cur_byte[encode_pos+1] = 0; -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); - return encode_pos; - } - - - int left_bit_width = getBitWith(final_k_start_value);//final_left_max - int right_bit_width = getBitWith(max_delta_value - final_k_end_value);//final_right_min - int2Bytes(final_x_l_plus,encode_pos,cur_byte); - encode_pos += 4; - int2Bytes(final_k_end_value,encode_pos,cur_byte); - encode_pos += 4; - -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; - intByte2Bytes(left_bit_width,encode_pos,cur_byte); - encode_pos += 1; - intByte2Bytes(right_bit_width,encode_pos,cur_byte); - encode_pos += 1; - - if (final_alpha == 0) { // 0 - - for (int i : bitmap_outlier) { - - intByte2Bytes(i,encode_pos,cur_byte); - encode_pos += 1; - } - } else { - encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); - } -// cur_byte[encode_pos+1] = 8; -// bit_index_list[0] = 8; - for (int cur_value : ts_block_delta) { - if (cur_value <= final_k_start_value) { - encode_pos = EncodeBits(cur_value, left_bit_width, encode_pos, cur_byte,bit_index_list); - } else if (cur_value >= final_k_end_value) { - encode_pos = EncodeBits(cur_value - final_k_end_value, right_bit_width, encode_pos, cur_byte,bit_index_list); - } else { - encode_pos = EncodeBits(cur_value - final_x_l_plus, bit_width_final, encode_pos, cur_byte,bit_index_list); - } - } - if(bit_index_list[0] != 8){ - encode_pos ++; - } - -// cur_byte[encode_pos+1] = 0; - -// if(k1==0 && k2==0){ -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// -// -// } -// else{ -// int2Bytes(final_x_l_plus,encode_pos,cur_byte); -// encode_pos += 4; -// int2Bytes(final_k_end_value,encode_pos,cur_byte); -// encode_pos += 4; -// -// bit_width_final = getBitWith(final_x_u_minus - final_x_l_plus); -// intByte2Bytes(bit_width_final,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(left_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// intByte2Bytes(right_bit_width,encode_pos,cur_byte); -// encode_pos += 1; -// if (final_alpha == 0) { // 0 -// -// for (int i : bitmap_outlier) { -// -// intByte2Bytes(i,encode_pos,cur_byte); -// encode_pos += 1; -// } -// } else { -// encode_pos = encodeOutlier2Bytes(final_left_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// encode_pos = encodeOutlier2Bytes(final_right_outlier_index, getBitWith(block_size-1),encode_pos,cur_byte); -// } -// } - - -// if(k1+k2!=block_size) -// encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final,encode_pos,cur_byte); -// if (k1 != 0) -// encode_pos = encodeOutlier2Bytes(final_left_outlier, left_bit_width,encode_pos,cur_byte); -// if (k2 != 0) -// encode_pos = encodeOutlier2Bytes(final_right_outlier, right_bit_width,encode_pos,cur_byte); - return encode_pos; - - } - - - private static int BOSBlockEncoder(int[] ts_block, int block_i, int block_size, int remaining ,int encode_pos , byte[] cur_byte) { - - int[] min_delta = new int[3]; - int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); - - - block_size = remaining-1; - int max_delta_value = min_delta[2]; - int[] value_list = new int[block_size]; - int unique_value_count = 0; - int[] value_count_list = new int[max_delta_value+1]; - for(int value:ts_block_delta){ - if(value_count_list[value]==0){ - value_count_list[value] = 1; - value_list[unique_value_count] = value; - unique_value_count ++; - }else{ - value_count_list[value] ++; - } - } - - int left_shift = getBitWith(block_size); - int mask = (1 << left_shift) - 1; - long[] sorted_value_list = new long[unique_value_count]; - int count = 0; - - for(int i=0;i<unique_value_count;i++){ - int value = value_list[i]; - sorted_value_list[i] = (((long) value) << left_shift) + value_count_list[value]; - } - Arrays.sort(sorted_value_list); - - for(int i=0;i<unique_value_count;i++){ - count += getCount(sorted_value_list[i], mask); - sorted_value_list[i] = (((long)getUniqueValue(sorted_value_list[i], left_shift) ) << left_shift) + count;//new_value_list[i] - } - - - int final_k_start_value = -1; // x_l_minus - int final_x_l_plus = 0; // x_l_plus - int final_k_end_value = max_delta_value+1; // x_u_plus - int final_x_u_minus = max_delta_value; // x_u_minus - - int min_bits = 0; - min_bits += (getBitWith(final_k_end_value - final_k_start_value - 2 ) * (block_size)); - - int cur_k1 = 0; - - int x_l_plus_value = 0; // x_l_plus - int x_u_minus_value = max_delta_value; // x_u_plus - - for (int end_value_i = 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - int x_u_plus_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - int cur_bits = 0; - int cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); // cur_k1 = 0 - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - x_u_plus_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_x_u_minus = x_u_minus_value; - final_k_end_value = x_u_plus_value; - } - } - - int k_start_value = -1; // x_l_minus - - for (int start_value_i = 0; start_value_i < unique_value_count-1; start_value_i++) { - long k_start_valueL = sorted_value_list[start_value_i]; - k_start_value = getUniqueValue(k_start_valueL, left_shift) ; - x_l_plus_value = getUniqueValue(sorted_value_list[start_value_i+1], left_shift) ; - - - cur_k1 = getCount(k_start_valueL,mask); - - int k_end_value; - int cur_bits; - int cur_k2; - k_end_value = max_delta_value + 1; - - cur_bits = 0; - cur_k2 = 0; - cur_bits += Math.min((cur_k2 + cur_k1) * getBitWith(block_size-1), block_size + cur_k2 + cur_k1); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1) * getBitWith(k_end_value- x_l_plus_value); //cur_k2 =0 - - if (cur_bits < min_bits) { - min_bits = cur_bits; - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = max_delta_value; - } - - for (int end_value_i = start_value_i + 1; end_value_i < unique_value_count; end_value_i++) { - - x_u_minus_value = getUniqueValue(sorted_value_list[end_value_i-1], left_shift); - k_end_value = getUniqueValue(sorted_value_list[end_value_i], left_shift); - - cur_bits = 0; - cur_k2 = block_size - getCount(sorted_value_list[end_value_i-1],mask); - - cur_bits += Math.min((cur_k1 + cur_k2) * getBitWith(block_size-1), block_size + cur_k1 + cur_k2); - cur_bits += cur_k1 * getBitWith(k_start_value); - if (cur_k1 + cur_k2 != block_size) - cur_bits += (block_size - cur_k1 - cur_k2) * getBitWith(x_u_minus_value - x_l_plus_value); - if (cur_k2 != 0) - cur_bits += cur_k2 * getBitWith(max_delta_value - k_end_value); - - - if (cur_bits < min_bits) { - min_bits = cur_bits; - - final_k_start_value = k_start_value; - final_x_l_plus = x_l_plus_value; - final_k_end_value = k_end_value; - final_x_u_minus = x_u_minus_value; - } - - } - } -// System.out.println(min_bits/4); - encode_pos = BOSEncodeBits(ts_block_delta, final_k_start_value, final_x_l_plus, final_k_end_value, final_x_u_minus, - max_delta_value, min_delta, encode_pos , cur_byte); -// System.out.println(encode_pos); -// System.out.println(encode_pos); - return encode_pos; - } - - - public static int BOSEncoder( - int[] data, int block_size, byte[] encoded_result) { - block_size++; - - int length_all = data.length; - - int encode_pos = 0; - int2Bytes(length_all,encode_pos,encoded_result); - encode_pos += 4; - - int block_num = length_all / block_size; - int2Bytes(block_size,encode_pos,encoded_result); - encode_pos+= 4; - - for (int i = 0; i < block_num; i++) { -// int start_encode_pos = encode_pos; - encode_pos = BOSBlockEncoder(data, i, block_size, block_size,encode_pos,encoded_result); -// System.out.println(encode_pos-start_encode_pos); -// System.out.println("------------------------------------------"); - } - - int remaining_length = length_all - block_num * block_size; - if (remaining_length <= 3) { - for (int i = remaining_length; i > 0; i--) { - int2Bytes(data[data.length - i], encode_pos, encoded_result); - encode_pos += 4; - } - - } - else { - - int start = block_num * block_size; - int remaining = length_all-start; - encode_pos = BOSBlockEncoder(data, block_num, block_size,remaining, encode_pos,encoded_result); - - } - - - return encode_pos; - } - - - public static int DecodeBits(byte[] cur_byte, int bit_width, int[] decode_pos_list) { - int decode_pos = decode_pos_list[0]; - int bit_index = decode_pos_list[1]; //cur_byte[decode_pos + 1]; - int remaining_bits = bit_width; - int num = 0; - - while (remaining_bits > 0) { - int available_bits = bit_index; - int bits_to_read = Math.min(available_bits, remaining_bits); - - // 计算要读取的位的掩码 - int mask = (1 << bits_to_read) - 1; - int bits = (cur_byte[decode_pos] >> (available_bits - bits_to_read)) & mask; - - // 将读取的位合并到结果中 - num = (num << bits_to_read) | bits; - - // 更新位宽和 bit_index - remaining_bits -= bits_to_read; - bit_index = available_bits - bits_to_read; - - if (bit_index == 0) { - bit_index = 8; - decode_pos++; - } - } - decode_pos_list[0] = decode_pos; - decode_pos_list[1] = bit_index; - - return num; - } - public static int BOSBlockDecoder(byte[] encoded, int decode_pos, int[] value_list, int block_size, int[] value_pos_arr) { - - int k_byte = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int k1_byte = (int) (k_byte % pow(2, 16)); - int k1 = k1_byte / 2; - int final_alpha = k1_byte % 2; - - int k2 = (int) (k_byte / pow(2, 16)); - - int value0 = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] =value0; - value_pos_arr[0] ++; - - int min_delta = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - int bit_width_final = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - int[] decode_list = new int[2]; - decode_list[0]= decode_pos; - decode_list[1]= 8; - - int valuePos = value_pos_arr[0]; - - if(k1==0 && k2==0){ - int pre_v = value0; - for (int i = 0; i < block_size; i++) { - int cur_delta = min_delta + DecodeBits(encoded, bit_width_final, decode_list); - pre_v += cur_delta; - value_list[valuePos++] = pre_v; - } - value_pos_arr[0] = valuePos; - return decode_list[0]; - } - - ArrayList<Integer> final_left_outlier_index = new ArrayList<>(); - ArrayList<Integer> final_right_outlier_index = new ArrayList<>(); -// ArrayList<Integer> final_left_outlier = new ArrayList<>(); -// ArrayList<Integer> final_right_outlier = new ArrayList<>(); -// ArrayList<Integer> final_normal= new ArrayList<>();; - ArrayList<Integer> bitmap_outlier = new ArrayList<>(); - int final_k_start_value = 0; - int final_k_end_value = 0; -// int bit_width_final = 0; - int left_bit_width = 0; - int right_bit_width = 0; - - final_k_start_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - final_k_end_value = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - -// bit_width_final = bytes2Integer(encoded, decode_pos, 1); -// decode_pos += 1; - - left_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - right_bit_width = bytes2Integer(encoded, decode_pos, 1); - decode_pos += 1; - - if (final_alpha == 0) { - int bitmap_bytes = (int) Math.ceil((double) (block_size + k1 + k2) / (double) 8); - for (int i = 0; i < bitmap_bytes; i++) { - bitmap_outlier.add(bytes2Integer(encoded, decode_pos, 1)); - decode_pos += 1; - } - int bitmap_outlier_i = 0; - int remaining_bits = 8; - int tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - int i = 0; - while (i < block_size ) { - if (remaining_bits > 1) { - int bit_i = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - if (remaining_bits == 0) { - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - } - } else if (remaining_bits == 1) { - int bit_i = tmp & 0x1; - remaining_bits = 8; - if (bitmap_outlier_i >= bitmap_bytes) break; - tmp = bitmap_outlier.get(bitmap_outlier_i); - bitmap_outlier_i++; - if (bit_i == 1) { - int bit_left_right = (tmp >> (remaining_bits - 1)) & 0x1; - remaining_bits -= 1; - if (bit_left_right == 1) { - final_left_outlier_index.add(i); - } else { - final_right_outlier_index.add(i); - } - } - } - i++; - } - } else { - ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); - final_left_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k1, decode_pos_result_left); - decode_pos = (decode_pos_result_left.get(0)); - ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); - final_right_outlier_index = decodeOutlier2Bytes(encoded, decode_pos, getBitWith(block_size-1), k2, decode_pos_result_right); - decode_pos = (decode_pos_result_right.get(0)); - } - - - - - -// ArrayList<Integer> decode_pos_normal = new ArrayList<>(); -// final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size - k1 - k2, decode_pos_normal); -// -// decode_pos = decode_pos_normal.get(0); -// if (k1 != 0) { -// ArrayList<Integer> decode_pos_result_left = new ArrayList<>(); -// final_left_outlier = decodeOutlier2Bytes(encoded, decode_pos, left_bit_width, k1, decode_pos_result_left); -// decode_pos = decode_pos_result_left.get(0); -// } -// if (k2 != 0) { -// ArrayList<Integer> decode_pos_result_right = new ArrayList<>(); -// final_right_outlier = decodeOutlier2Bytes(encoded, decode_pos, right_bit_width, k2, decode_pos_result_right); -// decode_pos = decode_pos_result_right.get(0); -// } - int left_outlier_i = 0; - int right_outlier_i = 0; - int normal_i = 0; - int pre_v = value0; -// int final_k_end_value = (int) (final_k_start_value + pow(2, bit_width_final)); - -// Precompute constants - int normalOffset = min_delta + final_k_start_value; - int rightOutlierOffset = min_delta + final_k_end_value; - -// Initialize indices and pre-fetch next outlier positions - int leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - int rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - decode_list[0]= decode_pos; -// decode_list[1]= 8; - // Use a local variable for the position - for (int i = 0; i < block_size; i++) { - int currentDelta; - if (i == leftOutlierNextIndex) { - // Process left outlier - currentDelta = min_delta + DecodeBits(encoded, left_bit_width, decode_list); // final_left_outlier.get(left_outlier_i); - left_outlier_i++; - leftOutlierNextIndex = (left_outlier_i < k1) ? final_left_outlier_index.get(left_outlier_i) : Integer.MAX_VALUE; - } else if (i == rightOutlierNextIndex) { - // Process right outlier - currentDelta = rightOutlierOffset + DecodeBits(encoded, right_bit_width, decode_list);// final_right_outlier.get(right_outlier_i); - right_outlier_i++; - rightOutlierNextIndex = (right_outlier_i < k2) ? final_right_outlier_index.get(right_outlier_i) : Integer.MAX_VALUE; - } else { - // Process normal value - currentDelta = normalOffset + DecodeBits(encoded, bit_width_final, decode_list); - normal_i++; - } - - // Update the cumulative value and store it - pre_v += currentDelta; - value_list[valuePos++] = pre_v; - } - value_pos_arr[0] = valuePos; - if(decode_list[1]!=8){ - return decode_list[0]+1; - }else { - return decode_list[0]; - } -// decode_pos = decode_list[0]; -// Update the position in the array - - -// return decode_pos; - } - - public static void BOSDecoder(byte[] encoded) { - - int decode_pos = 0; - int length_all = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - int block_size = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - - - - int block_num = length_all / block_size; - int remain_length = length_all - block_num * block_size; - - - int[] value_list = new int[length_all+block_size]; - block_size--; - - int[] value_pos_arr = new int[1]; - for (int k = 0; k < block_num; k++) { - - - decode_pos = BOSBlockDecoder(encoded, decode_pos, value_list, block_size,value_pos_arr); - - } - - if (remain_length <= 3) { - for (int i = 0; i < remain_length; i++) { - int value_end = bytes2Integer(encoded, decode_pos, 4); - decode_pos += 4; - value_list[value_pos_arr[0]] = value_end; - value_pos_arr[0]++; - } - } else { - remain_length --; - BOSBlockDecoder(encoded, decode_pos, value_list, remain_length, value_pos_arr); - } - } - - @Test - public void BOSOptimalTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R1O4_decode_time/compression_ratio/bos_v"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 1; file_i < 2; file_i++) { - - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { -// f=tempList[2]; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - while (loader.readRecord()) { -// String value = loader.getValues()[index]; - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); -// data.add(Integer.valueOf(value)); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "TS_2DIFF+BOS-V", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); -// break; - } - writer.close(); - } - } - - @Test - public void BOSVaryBlockSize() throws IOException, InterruptedException { -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; // your data path -// String output_parent_dir = parent_dir + "icde0802/compression_ratio/block_size_bos_v"; - String output_parent_dir = parent_dir + "icde0802/compression_ratio/block_size_bos_v"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 - - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 - - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 - - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 - - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 - - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 - - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 - - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 - - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 - - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 - - int repeatTime2 = 20; - int[] file_i_list = {0,9,10}; -// for (int file_i = 9; file_i < 10; file_i++) { -// for(int file_i :file_i_list){ - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Block Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { - System.out.println(f); - Thread.sleep(10000); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - - loader.readHeaders(); - - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - inputStream.close(); - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - -// for (int block_size_i = 5; block_size_i <14; block_size_i++) { - for (int block_size_i = 13; block_size_i > 4; block_size_i--) { - int block_size = (int) Math.pow(2, block_size_i); - System.out.println(block_size); - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - compressed_size = BOSEncoder(data2_arr, block_size, encoded_result); - } - - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - String[] record = { - f.toString(), - "TS_2DIFF+BOS-V", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(block_size_i), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - - } - - } - writer.close(); - - } - } - - private static void addToArchiveCompression(SevenZOutputFile out, File file, String dir) { - String name = dir + File.separator + file.getName(); - if(dir.equals(".")) { - name = file.getName(); - } - if (file.isFile()){ - SevenZArchiveEntry entry = null; - FileInputStream in = null; - try { - entry = out.createArchiveEntry(file, name); - out.putArchiveEntry(entry); - in = new FileInputStream(file); - byte[] b = new byte[1024]; - int count = 0; - while ((count = in.read(b)) > 0) { - out.write(b, 0, count); - } - } catch (IOException e) { - e.printStackTrace(); - } finally { - try { - out.closeArchiveEntry(); - in.close(); - } catch (IOException e) { - e.printStackTrace(); - } - - } - } else if (file.isDirectory()) { - File[] children = file.listFiles(); - if (children != null){ - for (File child : children){ - addToArchiveCompression(out, child, name); - } - } - } else { - System.out.println(file.getName() + " is not supported"); - } - } - - @Test - public void compressBOSTest() throws IOException { - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/";// your data path -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R3O2_compare_compression/compression_ratio/bos_v_comp"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(1024); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 -// dataset_block_size.add(256); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 -// dataset_block_size.add(1024); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 -// dataset_block_size.add(2048); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 -// dataset_block_size.add(1024); - - int repeatTime2 = 100; -// for (int file_i = 8; file_i < 9; file_i++) { - CompressionType[] compressList = { - CompressionType.LZ4, - CompressionType.LZMA2, - }; - - for (int file_i = input_path_list.size()-1; file_i >=0 ; file_i--) { - - String inputPath = input_path_list.get(file_i); - System.out.println(inputPath); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", -// "Compress Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - for (File f : tempList) { - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Integer> data1 = new ArrayList<>(); - ArrayList<Integer> data2 = new ArrayList<>(); - - loader.readHeaders(); - while (loader.readRecord()) { - data1.add(Integer.valueOf(loader.getValues()[0])); - data2.add(Integer.valueOf(loader.getValues()[1])); - } - inputStream.close(); - - int[] data2_arr = new int[data1.size()]; - for(int i = 0;i<data2.size();i++){ - data2_arr[i] = data2.get(i); - } - byte[] encoded_result = new byte[data2_arr.length*4]; - long encodeTime = 0; - long decodeTime = 0; - int length = 0; - - long s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) { - length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); - } - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - - for (CompressionType comp : compressList) { - double ratio = 0; - double compressed_size = 0; - ICompressor compressor = ICompressor.getCompressor(comp); - byte[] compressed = compressor.compress(encoded_result); - - // test compression ratio and compressed size - compressed_size += compressed.length; - double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - BOSDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - - String[] record = { - f.toString(), - "BOS+"+ comp, - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - double ratio = 0; - double compressed_size = 0; - File outfile = new File(parent_dir + "icde0802/example.bin"); - - // 使用FileOutputStream将byte数组写入文件 - try (FileOutputStream fos = new FileOutputStream(outfile)) { - fos.write(encoded_result); - } catch (IOException e2) { - // 处理可能的I/O异常 - e2.printStackTrace(); - } - - File input = new File(parent_dir + "icde0802/example.bin"); - File output = new File(parent_dir + "icde0802/example.7z"); - SevenZOutputFile out = new SevenZOutputFile(output); - - addToArchiveCompression(out, input, "."); - out.closeArchiveEntry(); - - long compressed = output.length(); - - - // test compression ratio and compressed size - compressed_size += compressed; - double ratioTmp = - (double) compressed / (double) (double) (data1.size() * Integer.BYTES); - ratio += ratioTmp; - - - String[] record = { - f.toString(), - "BOS+7-Zip", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data1.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - writer.close(); - } - } - - @Test - public void compressionBPTest() throws IOException { - - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-outlier/"; -// String parent_dir = "/Users/zihanguo/Downloads/R/outlier/outliier_code/encoding-outlier/"; - String output_parent_dir = parent_dir + "icde0802/supply_experiment/R3O2_compare_compression/compression_ratio/bp_comp/"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - - - for (String item : dataset_name) { - input_path_list.add(input_parent_dir + item); - } - - output_path_list.add(output_parent_dir + "CS-Sensors_ratio.csv"); // 0 - - output_path_list.add(output_parent_dir + "Metro-Traffic_ratio.csv");// 1 - - output_path_list.add(output_parent_dir + "USGS-Earthquakes_ratio.csv");// 2 - - output_path_list.add(output_parent_dir + "YZ-Electricity_ratio.csv"); // 3 - - output_path_list.add(output_parent_dir + "GW-Magnetic_ratio.csv"); //4 - - output_path_list.add(output_parent_dir + "TY-Fuel_ratio.csv");//5 - - output_path_list.add(output_parent_dir + "Cyber-Vehicle_ratio.csv"); //6 - - output_path_list.add(output_parent_dir + "Vehicle-Charge_ratio.csv");//7 - - output_path_list.add(output_parent_dir + "Nifty-Stocks_ratio.csv");//8 - - output_path_list.add(output_parent_dir + "TH-Climate_ratio.csv");//9 - - output_path_list.add(output_parent_dir + "TY-Transport_ratio.csv");//10 - - output_path_list.add(output_parent_dir + "EPM-Education_ratio.csv");//11 - -// for (int file_i = 7; file_i < 8; file_i++) { - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - String inputPath = input_path_list.get(file_i); - String Output = output_path_list.get(file_i); - - // speed - int repeatTime = 100; // set repeat time - String dataTypeName = "int"; // set dataType - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - // select encoding algorithms - TSEncoding[] encodingList = { - TSEncoding.PLAIN , -// TSEncoding.TS_2DIFF, -// TSEncoding.RLE, -// TSEncoding.SPRINTZ, -// TSEncoding.GORILLA, -// TSEncoding.RLBE, -// TSEncoding.CHIMP, -// TSEncoding.BUFF - }; - - CompressionType[] compressList = { -// CompressionType.UNCOMPRESSED, - CompressionType.LZ4, - CompressionType.LZMA2, - - }; - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", -// "Compress Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Compression Ratio" -// "Input Direction", -// "Column Index", -// "Encoding Algorithm", -// "Compress Algorithm", -// "Encoding Time", -// "Decoding Time", -// "Compress Time", -// "Uncompress Time", -// "Points", -// "Compressed Size", -// "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - ArrayList<Integer> columnIndexes = new ArrayList<>(); // set the column indexes of compressed - for (int i = 0; i < 2; i++) { - columnIndexes.add(i, i); - } - for (File f : tempList) { - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<String> data = new ArrayList<>(); - - for (int index : columnIndexes) { - if (index == 0){ - continue; - } - int max_precision = 2; - if (file_i == 3){ - max_precision = 4; - } else if (file_i == 4 ||file_i == 5) { - max_precision = 3; - } - loader.readHeaders(); - data.clear(); - while (loader.readRecord()) { - String v = loader.getValues()[index]; - - data.add(v); - if (!v.matches("-?\\d+")){ - dataTypeName = "float"; - } - } - inputStream.close(); - - switch (dataTypeName) { - case "int": - { - TSDataType dataType = TSDataType.INT32; // set TSDataType - ArrayList<Integer> tmp = new ArrayList<>(); - for (String value : data) { - tmp.add(Integer.valueOf(value)); - } - // Iterate over each encoding algorithm - for (TSEncoding encoding : encodingList) { - Encoder encoder = - TSEncodingBuilder.getEncodingBuilder(encoding).getEncoder(dataType); - Decoder decoder = Decoder.getDecoderByType(encoding, dataType); - long encodeTime = 0; - long decodeTime = 0; - - // Iterate over each compression algorithm - for (CompressionType comp : compressList) { - ICompressor compressor = ICompressor.getCompressor(comp); - IUnCompressor unCompressor = IUnCompressor.getUnCompressor(comp); - - double ratio = 0; - double compressed_size = 0; - - long compressTime = 0; - long uncompressTime = 0; - - // repeat many times to test time - for (int i = 0; i < repeatTime; i++) { - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - - // test encode time - long s = System.nanoTime(); - for (int val : tmp) { - encoder.encode(val, buffer); - } - - encoder.flush(buffer); - long e = System.nanoTime(); - encodeTime += (e - s); - - // test compress time - byte[] elems = buffer.toByteArray(); - s = System.nanoTime(); - byte[] compressed = compressor.compress(elems); - e = System.nanoTime(); - compressTime += (e - s); - - // test compression ratio and compressed size - compressed_size += compressed.length; - double ratioTmp = - (double) compressed.length / (double) (tmp.size() * Integer.BYTES); - ratio += ratioTmp; - - // test uncompress time - s = System.nanoTime(); - unCompressor.uncompress(compressed); - e = System.nanoTime(); - uncompressTime += (e - s); - - // test decode time - ByteBuffer ebuffer = ByteBuffer.wrap(buffer.toByteArray()); - s = System.nanoTime(); - while (decoder.hasNext(ebuffer)) { - decoder.readInt(ebuffer); - } - e = System.nanoTime(); - decodeTime += (e - s); - - buffer.close(); - } - - ratio /= repeatTime; - compressed_size /= repeatTime; - encodeTime /= repeatTime; - decodeTime /= repeatTime; - - String[] record = { - f.toString(), - comp.toString(), - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) -// f.toString(), -// String.valueOf(index), -// encoding.toString(), -// comp.toString(), -// String.valueOf(encodeTime), -// String.valueOf(decodeTime), -// String.valueOf(compressTime), -// String.valueOf(uncompressTime), -// String.valueOf(data.size()), -// String.valueOf(compressed_size), -// String.valueOf(ratio) - }; - System.out.println(ratio); - writer.writeRecord(record); - } - } - tmp.clear(); - break; - } - - // write info to file - case "float": - { - System.out.println("get float"); - TSDataType dataType = TSDataType.FLOAT; - ArrayList<Float> tmp = new ArrayList<>(); - data.removeIf(String::isEmpty); - for (String datum : data) { - tmp.add(Float.valueOf(datum)); - } - - // Iterate over each encoding algorithm - for (TSEncoding encoding : encodingList) { - Encoder encoder; - encoder = TSEncodingBuilder.getEncodingBuilder(encoding).getEncoder(dataType); - if (encoding == TSEncoding.TS_2DIFF){ - encoder = new FloatEncoder(TSEncoding.TS_2DIFF, dataType, max_precision); - } else if (encoding == TSEncoding.RLE){ - encoder = new FloatEncoder(TSEncoding.RLE, dataType, max_precision); - } - - Decoder decoder = Decoder.getDecoderByType(encoding, dataType); - - long encodeTime = 0; - long decodeTime = 0; - // Iterate over each compression algorithm - for (CompressionType comp : compressList) { - ICompressor compressor = ICompressor.getCompressor(comp); - IUnCompressor unCompressor = IUnCompressor.getUnCompressor(comp); - long compressTime = 0; - long uncompressTime = 0; - double ratio = 0; - double compressed_size = 0; - - // repeat many times to test time - for (int i = 0; i < repeatTime; i++) { - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - - // test encode time - long s = System.nanoTime(); - for (float val : tmp) { - encoder.encode(val, buffer); - } - encoder.flush(buffer); - long e = System.nanoTime(); - encodeTime += (e - s); - - // test compress time - byte[] elems = buffer.toByteArray(); - s = System.nanoTime(); - byte[] compressed = compressor.compress(elems); - e = System.nanoTime(); - compressTime += (e - s); - - // test compression ratio and compressed size - compressed_size += compressed.length; - double ratioTmp = - (double) compressed.length / (double) (tmp.size() * Float.BYTES); - ratio += ratioTmp; - - // test uncompress time - s = System.nanoTime(); - unCompressor.uncompress(compressed); - e = System.nanoTime(); - uncompressTime += (e - s); - - // test decode time - ByteBuffer ebuffer = ByteBuffer.wrap(buffer.toByteArray()); - while (decoder.hasNext(ebuffer)) { - decoder.readFloat(ebuffer); - } - e = System.nanoTime(); - decodeTime += (e - s); - - buffer.close(); - } - ratio /= repeatTime; - compressed_size /= repeatTime; - encodeTime /= repeatTime; - decodeTime /= repeatTime; - - // write info to file - String[] record = { - f.toString(), - comp.toString(), - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data.size()), - String.valueOf(compressed_size), - String.valueOf(ratio) -// f.toString(), -// String.valueOf(index), -// encoding.toString(), -// comp.toString(), -// String.valueOf(encodeTime), -// String.valueOf(decodeTime), -// String.valueOf(compressTime), -// String.valueOf(uncompressTime), -// String.valueOf(data.size()), -// String.valueOf(compressed_size), -// String.valueOf(ratio) - }; - System.out.println(ratio); - writer.writeRecord(record); - } - } - break; - } - } - inputStream = Files.newInputStream(f.toPath()); - loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - } - } - writer.close(); - } - } - -}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFSubcolumnTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFSubcolumnTest.java new file mode 100644 index 0000000..11092b8 --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFSubcolumnTest.java
@@ -0,0 +1,495 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import org.junit.Test; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; + +public class TSDIFFSubcolumnTest { + + public static int Encoder(int[] data, int block_size, byte[] encoded_result) { + int data_length = data.length; + int encode_pos = 0; + + encoded_result[0] = (byte) (data_length >> 24); + encoded_result[1] = (byte) (data_length >> 16); + encoded_result[2] = (byte) (data_length >> 8); + encoded_result[3] = (byte) data_length; + encode_pos += 4; + + encoded_result[4] = (byte) (block_size >> 24); + encoded_result[5] = (byte) (block_size >> 16); + encoded_result[6] = (byte) (block_size >> 8); + encoded_result[7] = (byte) block_size; + encode_pos += 4; + + int num_blocks = data_length / block_size; + + int remainder = data_length % block_size; + + int[] beta = new int[1]; + beta[0] = 3; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockEncoder(data, i, block_size, block_size, encode_pos, encoded_result, beta); + } + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + int value = data[num_blocks * block_size + i]; + encoded_result[encode_pos] = (byte) (value >> 24); + encoded_result[encode_pos + 1] = (byte) (value >> 16); + encoded_result[encode_pos + 2] = (byte) (value >> 8); + encoded_result[encode_pos + 3] = (byte) value; + encode_pos += 4; + } + } else { + encode_pos = BlockEncoder(data, num_blocks, block_size, remainder, encode_pos, + encoded_result, beta); + } + + // System.out.println("beta: " + beta[0]); + + return encode_pos; + } + + public static int[] Decoder(byte[] encoded_result) { + int encode_pos = 0; + + int data_length = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) + | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int block_size = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int num_blocks = data_length / block_size; + + int[] data = new int[data_length]; + + for (int i = 0; i < num_blocks; i++) { + encode_pos = BlockDecoder(encoded_result, i, block_size, block_size, encode_pos, data); + } + + int remainder = data_length % block_size; + + if (remainder <= 3) { + for (int i = 0; i < remainder; i++) { + data[num_blocks * block_size + i] = ((encoded_result[encode_pos] & 0xFF) << 24) | + ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + } + } else { + encode_pos = BlockDecoder(encoded_result, num_blocks, block_size, remainder, + encode_pos, data); + } + + return data; + } + + public static int[] getAbsDeltaTsBlock( + int[] ts_block, + int i, + int block_size, + int remaining, + int[] min_delta) { + int[] ts_block_delta = new int[remaining - 1]; + + int value_delta_min = Integer.MAX_VALUE; + int value_delta_max = Integer.MIN_VALUE; + int base = i * block_size + 1; + int end = i * block_size + remaining; + + int tmp_j_1 = ts_block[base - 1]; + min_delta[0] = tmp_j_1; + int j = base; + int tmp_j; + + while (j < end) { + tmp_j = ts_block[j]; + int epsilon_v = tmp_j - tmp_j_1; + ts_block_delta[j - base] = epsilon_v; + if (epsilon_v < value_delta_min) { + value_delta_min = epsilon_v; + } + if (epsilon_v > value_delta_max) { + value_delta_max = epsilon_v; + } + tmp_j_1 = tmp_j; + j++; + } + j = 0; + end = remaining - 1; + while (j < end) { + ts_block_delta[j] = ts_block_delta[j] - value_delta_min; + j++; + } + + min_delta[1] = value_delta_min; + min_delta[2] = (value_delta_max - value_delta_min); + + return ts_block_delta; + } + + public static int BlockEncoder(int[] data, int block_index, int block_size, int remainder, + int encode_pos, byte[] encoded_result, int[] beta) { + int[] min_delta = new int[3]; + + // data_delta 的长度为 remainder - 1 + int[] data_delta = getAbsDeltaTsBlock(data, block_index, block_size, remainder, min_delta); + + encoded_result[encode_pos] = (byte) (min_delta[0] >> 24); + encoded_result[encode_pos + 1] = (byte) (min_delta[0] >> 16); + encoded_result[encode_pos + 2] = (byte) (min_delta[0] >> 8); + encoded_result[encode_pos + 3] = (byte) min_delta[0]; + encode_pos += 4; + + encoded_result[encode_pos] = (byte) (min_delta[1] >> 24); + encoded_result[encode_pos + 1] = (byte) (min_delta[1] >> 16); + encoded_result[encode_pos + 2] = (byte) (min_delta[1] >> 8); + encoded_result[encode_pos + 3] = (byte) min_delta[1]; + encode_pos += 4; + + if (block_index == 0) { + int maxValue = 0; + for (int j = 0; j < remainder - 1; j++) { + if (data_delta[j] > maxValue) { + maxValue = data_delta[j]; + } + } + int m = SubcolumnTest.bitWidth(maxValue); + + beta[0] = SubcolumnTest.Subcolumn(data_delta, remainder - 1, m, block_size); + } + + encode_pos = SubcolumnTest.SubcolumnEncoder(data_delta, encode_pos, encoded_result, beta, block_size); + + return encode_pos; + } + + public static int BlockDecoder(byte[] encoded_result, int block_index, int block_size, int remainder, + int encode_pos, int[] data) { + int[] min_delta = new int[3]; + + min_delta[0] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + min_delta[1] = ((encoded_result[encode_pos] & 0xFF) << 24) | ((encoded_result[encode_pos + 1] & 0xFF) << 16) | + ((encoded_result[encode_pos + 2] & 0xFF) << 8) | (encoded_result[encode_pos + 3] & 0xFF); + encode_pos += 4; + + int[] data_delta = new int[remainder - 1]; + + encode_pos = SubcolumnTest.SubcolumnDecoder(encoded_result, encode_pos, data_delta, block_size); + + for (int i = 0; i < remainder - 1; i++) { + data_delta[i] = data_delta[i] + min_delta[1]; + } + + data[block_index * block_size] = min_delta[0]; + + for (int i = 0; i < remainder - 1; i++) { + data[block_index * block_size + i + 1] = data[block_index * block_size + i] + data_delta[i]; + } + + return encode_pos; + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testSubcolumn() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/"; + // String output_parent_dir = parent_dir + "result/"; + + String outputPath = output_parent_dir + "ts2diff_subcolumn.csv"; + + int block_size = 512; + + int repeatTime = 200; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + data1.add(Float.valueOf(f_str)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = Encoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + ratio += ratioTmp; + + int[] data2_arr_decoded = new int[data2_arr.length]; + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + data2_arr_decoded = Decoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "TS2DIFF+Sub-columns", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + System.out.println(ratio); + } + + writer.close(); + } + + @Test + public void testTransData() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String output_parent_dir = "D:/encoding-subcolumn/trans_data_result/"; + // String output_parent_dir = parent_dir + "trans_data_result/"; + + String input_parent_dir = parent_dir + "trans_data/"; + + ArrayList<String> input_path_list = new ArrayList<>(); + ArrayList<String> output_path_list = new ArrayList<>(); + ArrayList<String> dataset_name = new ArrayList<>(); + ArrayList<Integer> dataset_block_size = new ArrayList<>(); + + try (Stream<Path> paths = Files.walk(Paths.get(input_parent_dir))) { + paths.filter(Files::isDirectory) + .filter(path -> !path.equals(Paths.get(input_parent_dir))) + .forEach(dir -> { + String name = dir.getFileName().toString(); + dataset_name.add(name); + input_path_list.add(dir.toString()); + dataset_block_size.add(1024); + }); + } + + String outputPath = output_parent_dir + "ts2diff_subcolumn.csv"; + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + int repeatTime = 100; + + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { + + String inputPath = input_path_list.get(file_i); + System.out.println(inputPath); + + File file = new File(inputPath); + File[] tempList = file.listFiles(); + + long totalEncodeTime = 0; + long totalDecodeTime = 0; + double totalCompressedSize = 0; + int totalPoints = 0; + + for (File f : tempList) { + String datasetName = extractFileName(f.toString()); + InputStream inputStream = Files.newInputStream(f.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Integer> data1 = new ArrayList<>(); + ArrayList<Integer> data2 = new ArrayList<>(); + + loader.readHeaders(); + while (loader.readRecord()) { + // String value = loader.getValues()[index]; + data1.add(Integer.valueOf(loader.getValues()[0])); + data2.add(Integer.valueOf(loader.getValues()[1])); + // data.add(Integer.valueOf(value)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + for (int i = 0; i < data2.size(); i++) { + data2_arr[i] = data2.get(i); + } + byte[] encoded_result = new byte[data2_arr.length * 4]; + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = Encoder(data2_arr, dataset_block_size.get(file_i), encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + ratio += ratioTmp; + s = System.nanoTime(); + + int[] data2_arr_decoded = new int[data1.size()]; + + for (int repeat = 0; repeat < repeatTime; repeat++) { + data2_arr_decoded = Decoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + totalEncodeTime += encodeTime; + totalDecodeTime += decodeTime; + totalCompressedSize += compressed_size; + totalPoints += data1.size(); + + } + + double compressionRatio = totalCompressedSize / (totalPoints * Integer.BYTES); + + String[] record = { + dataset_name.get(file_i), + "TS2DIFF+Sub-columns", + String.valueOf(totalEncodeTime), + String.valueOf(totalDecodeTime), + String.valueOf(totalPoints), + String.valueOf(totalCompressedSize), + String.valueOf(compressionRatio) + }; + + writer.writeRecord(record); + System.out.println(compressionRatio); + } + writer.close(); + } + +}
diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFTest.java new file mode 100644 index 0000000..cd25f03 --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/TSDIFFTest.java
@@ -0,0 +1,989 @@ +package org.apache.iotdb.tsfile.encoding; + +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; +import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry; +import org.apache.commons.compress.archivers.sevenz.SevenZOutputFile; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; +import org.apache.iotdb.tsfile.compress.ICompressor; +import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType; +import org.junit.Test; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Stream; + +import static java.lang.Math.pow; + +public class TSDIFFTest { + + public static long combine2Int(int int1, int int2) { + return ((long) int1 << 32) | (int2 & 0xFFFFFFFFL); + } + + public static int getTime(long long1) { + return ((int) (long1 >> 32)); + } + + public static int getValue(long long1) { + return ((int) (long1)); + } + + public static int getCount(long long1, int mask) { + return ((int) (long1 & mask)); + } + + public static int getUniqueValue(long long1, int left_shift) { + return ((int) ((long1) >> left_shift)); + } + + public static int getBitWith(int num) { + if (num == 0) + return 1; + else + return 32 - Integer.numberOfLeadingZeros(num); + } + + public static void int2Bytes(int integer, int encode_pos, byte[] cur_byte) { + cur_byte[encode_pos] = (byte) (integer >> 24); + cur_byte[encode_pos + 1] = (byte) (integer >> 16); + cur_byte[encode_pos + 2] = (byte) (integer >> 8); + cur_byte[encode_pos + 3] = (byte) (integer); + } + + public static void intByte2Bytes(int integer, int encode_pos, byte[] cur_byte) { + cur_byte[encode_pos] = (byte) (integer); + } + + private static void long2intBytes(long integer, int encode_pos, byte[] cur_byte) { + cur_byte[encode_pos] = (byte) (integer >> 24); + cur_byte[encode_pos + 1] = (byte) (integer >> 16); + cur_byte[encode_pos + 2] = (byte) (integer >> 8); + cur_byte[encode_pos + 3] = (byte) (integer); + } + + public static int bytes2Integer(byte[] encoded, int start, int num) { + int value = 0; + if (num > 4) { + System.out.println("bytes2Integer error"); + return 0; + } + for (int i = 0; i < num; i++) { + value <<= 8; + int b = encoded[i + start] & 0xFF; + value |= b; + } + return value; + } + + private static long bytesLong2Integer(byte[] encoded, int decode_pos) { + long value = 0; + for (int i = 0; i < 4; i++) { + value <<= 8; + int b = encoded[i + decode_pos] & 0xFF; + value |= b; + } + return value; + } + + public static void pack8Values(ArrayList<Integer> values, int offset, int width, int encode_pos, + byte[] encoded_result) { + int bufIdx = 0; + int valueIdx = offset; + // remaining bits for the current unfinished Integer + int leftBit = 0; + + while (valueIdx < 8 + offset) { + // buffer is used for saving 32 bits as a part of result + int buffer = 0; + // remaining size of bits in the 'buffer' + int leftSize = 32; + + // encode the left bits of current Integer to 'buffer' + if (leftBit > 0) { + buffer |= (values.get(valueIdx) << (32 - leftBit)); + leftSize -= leftBit; + leftBit = 0; + valueIdx++; + } + + while (leftSize >= width && valueIdx < 8 + offset) { + // encode one Integer to the 'buffer' + buffer |= (values.get(valueIdx) << (leftSize - width)); + leftSize -= width; + valueIdx++; + } + // If the remaining space of the buffer can not save the bits for one Integer, + if (leftSize > 0 && valueIdx < 8 + offset) { + // put the first 'leftSize' bits of the Integer into remaining space of the + // buffer + buffer |= (values.get(valueIdx) >>> (width - leftSize)); + leftBit = width - leftSize; + } + + // put the buffer into the final result + for (int j = 0; j < 4; j++) { + encoded_result[encode_pos] = (byte) ((buffer >>> ((3 - j) * 8)) & 0xFF); + encode_pos++; + bufIdx++; + if (bufIdx >= width) { + return; + } + } + } + // return encode_pos; + } + + public static void unpack8Values(byte[] encoded, int offset, int width, ArrayList<Integer> result_list) { + int byteIdx = offset; + long buffer = 0; + // total bits which have read from 'buf' to 'buffer'. i.e., + // number of available bits to be decoded. + int totalBits = 0; + int valueIdx = 0; + + while (valueIdx < 8) { + // If current available bits are not enough to decode one Integer, + // then add next byte from buf to 'buffer' until totalBits >= width + while (totalBits < width) { + buffer = (buffer << 8) | (encoded[byteIdx] & 0xFF); + byteIdx++; + totalBits += 8; + } + + // If current available bits are enough to decode one Integer, + // then decode one Integer one by one until left bits in 'buffer' is + // not enough to decode one Integer. + while (totalBits >= width && valueIdx < 8) { + result_list.add((int) (buffer >>> (totalBits - width))); + valueIdx++; + totalBits -= width; + buffer = buffer & ((1L << totalBits) - 1); + } + } + } + + public static int bitPacking(ArrayList<Integer> numbers, int start, int bit_width, int encode_pos, + byte[] encoded_result) { + int block_num = (numbers.size() - start) / 8; + for (int i = 0; i < block_num; i++) { + pack8Values(numbers, start + i * 8, bit_width, encode_pos, encoded_result); + encode_pos += bit_width; + } + + return encode_pos; + + } + + public static ArrayList<Integer> decodeBitPacking( + byte[] encoded, int decode_pos, int bit_width, int block_size) { + ArrayList<Integer> result_list = new ArrayList<>(); + int block_num = (block_size - 1) / 8; + + for (int i = 0; i < block_num; i++) { // bitpacking + unpack8Values(encoded, decode_pos, bit_width, result_list); + decode_pos += bit_width; + + } + return result_list; + } + + public static int[] getAbsDeltaTsBlock( + int[] ts_block, + int i, + int block_size, + int remaining, + int[] min_delta) { + int[] ts_block_delta = new int[remaining - 1]; + + int value_delta_min = Integer.MAX_VALUE; + int value_delta_max = Integer.MIN_VALUE; + int base = i * block_size + 1; + int end = i * block_size + remaining; + + int tmp_j_1 = ts_block[base - 1]; + min_delta[0] = tmp_j_1; + int j = base; + int tmp_j; + + while (j < end) { + tmp_j = ts_block[j]; + int epsilon_v = tmp_j - tmp_j_1; + ts_block_delta[j - base] = epsilon_v; + if (epsilon_v < value_delta_min) { + value_delta_min = epsilon_v; + } + if (epsilon_v > value_delta_max) { + value_delta_max = epsilon_v; + } + tmp_j_1 = tmp_j; + j++; + } + j = 0; + end = remaining - 1; + while (j < end) { + ts_block_delta[j] = ts_block_delta[j] - value_delta_min; + j++; + } + + min_delta[1] = value_delta_min; + min_delta[2] = (value_delta_max - value_delta_min); + + return ts_block_delta; + } + + public static int encodeOutlier2Bytes( + ArrayList<Integer> ts_block_delta, + int bit_width, + int encode_pos, byte[] encoded_result) { + + encode_pos = bitPacking(ts_block_delta, 0, bit_width, encode_pos, encoded_result); + + int n_k = ts_block_delta.size(); + int n_k_b = n_k / 8; + long cur_remaining = 0; // encoded int + int cur_number_bits = 0; // the bit width used of encoded int + for (int i = n_k_b * 8; i < n_k; i++) { + long cur_value = ts_block_delta.get(i); + int cur_bit_width = bit_width; // remaining bit width of current value + + if (cur_number_bits + bit_width >= 32) { + cur_remaining <<= (32 - cur_number_bits); + cur_bit_width = bit_width - 32 + cur_number_bits; + cur_remaining += ((cur_value >> cur_bit_width)); + long2intBytes(cur_remaining, encode_pos, encoded_result); + encode_pos += 4; + cur_remaining = 0; + cur_number_bits = 0; + } + + cur_remaining <<= cur_bit_width; + cur_number_bits += cur_bit_width; + cur_remaining += (((cur_value << (32 - cur_bit_width)) & 0xFFFFFFFFL) >> (32 - cur_bit_width)); // + } + cur_remaining <<= (32 - cur_number_bits); + long2intBytes(cur_remaining, encode_pos, encoded_result); + encode_pos += 4; + return encode_pos; + + } + + public static ArrayList<Integer> decodeOutlier2Bytes( + byte[] encoded, + int decode_pos, + int bit_width, + int length, + ArrayList<Integer> encoded_pos_result) { + + int n_k_b = length / 8; + int remaining = length - n_k_b * 8; + ArrayList<Integer> result_list = new ArrayList<>( + decodeBitPacking(encoded, decode_pos, bit_width, n_k_b * 8 + 1)); + decode_pos += n_k_b * bit_width; + + ArrayList<Long> int_remaining = new ArrayList<>(); + int int_remaining_size = remaining * bit_width / 32 + 1; + for (int j = 0; j < int_remaining_size; j++) { + int_remaining.add(bytesLong2Integer(encoded, decode_pos)); + decode_pos += 4; + } + + int cur_remaining_bits = 32; // remaining bit width of current value + long cur_number = int_remaining.get(0); + int cur_number_i = 1; + for (int i = n_k_b * 8; i < length; i++) { + if (bit_width < cur_remaining_bits) { + int tmp = (int) (cur_number >> (32 - bit_width)); + result_list.add(tmp); + cur_number <<= bit_width; + cur_number &= 0xFFFFFFFFL; + cur_remaining_bits -= bit_width; + } else { + int tmp = (int) (cur_number >> (32 - cur_remaining_bits)); + int remain_bits = bit_width - cur_remaining_bits; + tmp <<= remain_bits; + + cur_number = int_remaining.get(cur_number_i); + cur_number_i++; + tmp += (int) (cur_number >> (32 - remain_bits)); + result_list.add(tmp); + cur_number <<= remain_bits; + cur_number &= 0xFFFFFFFFL; + cur_remaining_bits = 32 - remain_bits; + } + } + encoded_pos_result.add(decode_pos); + return result_list; + } + + private static int BOSBlockEncoder(int[] ts_block, int block_i, int block_size, int remaining, int encode_pos, + byte[] cur_byte) { + + int[] min_delta = new int[3]; + int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); + + int2Bytes(min_delta[0], encode_pos, cur_byte); + encode_pos += 4; + int2Bytes(min_delta[1], encode_pos, cur_byte); + encode_pos += 4; + int bit_width_final = getBitWith(min_delta[2]); + intByte2Bytes(bit_width_final, encode_pos, cur_byte); + encode_pos += 1; + ArrayList<Integer> final_normal = new ArrayList<>(); + for (int value : ts_block_delta) { + final_normal.add(value); + } + encode_pos = encodeOutlier2Bytes(final_normal, bit_width_final, encode_pos, cur_byte); + return encode_pos; + } + + public static int BOSEncoder( + int[] data, int block_size, byte[] encoded_result) { + block_size++; + + int length_all = data.length; + + int encode_pos = 0; + int2Bytes(length_all, encode_pos, encoded_result); + encode_pos += 4; + + int block_num = length_all / block_size; + int2Bytes(block_size, encode_pos, encoded_result); + encode_pos += 4; + + for (int i = 0; i < block_num; i++) { + encode_pos = BOSBlockEncoder(data, i, block_size, block_size, encode_pos, encoded_result); + } + + int remaining_length = length_all - block_num * block_size; + if (remaining_length <= 3) { + for (int i = remaining_length; i > 0; i--) { + int2Bytes(data[data.length - i], encode_pos, encoded_result); + encode_pos += 4; + } + + } else { + + int start = block_num * block_size; + int remaining = length_all - start; + encode_pos = BOSBlockEncoder(data, block_num, block_size, remaining, encode_pos, encoded_result); + + } + + return encode_pos; + } + + public static int EncodeBits(int num, + int bit_width, + int encode_pos, + byte[] cur_byte, + int[] bit_index_list) { + // 找到要插入的位的索引 + int bit_index = bit_index_list[0];// cur_byte[encode_pos + 1]; + + // 计算数值的起始位位置 + int remaining_bits = bit_width; + + while (remaining_bits > 0) { + // 计算在当前字节中可以使用的位数 + int available_bits = bit_index; + int bits_to_write = Math.min(available_bits, remaining_bits); + + // 更新 bit_index + bit_index = available_bits - bits_to_write; + + // 计算要写入的位的掩码和数值 + int mask = (1 << bits_to_write) - 1; + int bits = (num >> (remaining_bits - bits_to_write)) & mask; + + // 写入到当前位置 + cur_byte[encode_pos] &= (byte) ~(mask << bit_index); // 清除对应位置的位 + cur_byte[encode_pos] |= (byte) (bits << bit_index); + + // 更新位宽和数值 + remaining_bits -= bits_to_write; + if (bit_index == 0) { + bit_index = 8; + encode_pos++; + } + } + bit_index_list[0] = bit_index; + // cur_byte[encode_pos + 1] = (byte) bit_index; + return encode_pos; + } + + private static int BOSBlockEncoderImprove(int[] ts_block, int block_i, int block_size, int remaining, + int encode_pos, byte[] cur_byte) { + + int[] min_delta = new int[3]; + int[] ts_block_delta = getAbsDeltaTsBlock(ts_block, block_i, block_size, remaining, min_delta); + + int2Bytes(min_delta[0], encode_pos, cur_byte); + encode_pos += 4; + int2Bytes(min_delta[1], encode_pos, cur_byte); + encode_pos += 4; + int bit_width_final = getBitWith(min_delta[2]); + intByte2Bytes(bit_width_final, encode_pos, cur_byte); + encode_pos += 1; + // ArrayList<Integer> final_normal = new ArrayList<>(); + int[] bit_index_list = new int[1]; + bit_index_list[0] = 8; + for (int value : ts_block_delta) { + encode_pos = EncodeBits(value, bit_width_final, encode_pos, cur_byte, bit_index_list); + } + if (bit_index_list[0] != 8) { + encode_pos++; + } + // encode_pos = encodeOutlier2Bytes(final_normal, + // bit_width_final,encode_pos,cur_byte); + return encode_pos; + } + + public static int BOSEncoderImprove( + int[] data, int block_size, byte[] encoded_result) { + block_size++; + + int length_all = data.length; + + int encode_pos = 0; + int2Bytes(length_all, encode_pos, encoded_result); + encode_pos += 4; + + int block_num = length_all / block_size; + int2Bytes(block_size, encode_pos, encoded_result); + encode_pos += 4; + + for (int i = 0; i < block_num; i++) { + encode_pos = BOSBlockEncoderImprove(data, i, block_size, block_size, encode_pos, encoded_result); + } + + int remaining_length = length_all - block_num * block_size; + if (remaining_length <= 3) { + for (int i = remaining_length; i > 0; i--) { + int2Bytes(data[data.length - i], encode_pos, encoded_result); + encode_pos += 4; + } + + } else { + + int start = block_num * block_size; + int remaining = length_all - start; + encode_pos = BOSBlockEncoderImprove(data, block_num, block_size, remaining, encode_pos, encoded_result); + + } + + return encode_pos; + } + + public static int BOSBlockDecoder(byte[] encoded, int decode_pos, int[] value_list, int block_size, + int[] value_pos_arr) { + + int value0 = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + value_list[value_pos_arr[0]] = value0; + value_pos_arr[0]++; + + int min_delta = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + + int bit_width_final = bytes2Integer(encoded, decode_pos, 1); + decode_pos += 1; + + ArrayList<Integer> decode_pos_normal = new ArrayList<>(); + ArrayList<Integer> final_normal = decodeOutlier2Bytes(encoded, decode_pos, bit_width_final, block_size, + decode_pos_normal); + + decode_pos = decode_pos_normal.get(0); + int normal_i = 0; + int pre_v = value0; + + for (int i = 0; i < block_size; i++) { + int current_delta = min_delta + final_normal.get(normal_i); + pre_v = current_delta + pre_v; + value_list[value_pos_arr[0]] = pre_v; + value_pos_arr[0]++; + } + + return decode_pos; + } + + public static void BOSDecoder(byte[] encoded) { + + int decode_pos = 0; + int length_all = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + int block_size = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + + int block_num = length_all / block_size; + int remain_length = length_all - block_num * block_size; + + int[] value_list = new int[length_all + block_size]; + block_size--; + + int[] value_pos_arr = new int[1]; + for (int k = 0; k < block_num; k++) { + + decode_pos = BOSBlockDecoder(encoded, decode_pos, value_list, block_size, value_pos_arr); + + } + + if (remain_length <= 3) { + for (int i = 0; i < remain_length; i++) { + int value_end = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + value_list[value_pos_arr[0]] = value_end; + value_pos_arr[0]++; + } + } else { + remain_length--; + BOSBlockDecoder(encoded, decode_pos, value_list, remain_length, value_pos_arr); + } + } + + public static int DecodeBits(byte[] cur_byte, int bit_width, int[] decode_pos_list) { + int decode_pos = decode_pos_list[0]; + int bit_index = decode_pos_list[1]; // cur_byte[decode_pos + 1]; + int remaining_bits = bit_width; + int num = 0; + + while (remaining_bits > 0) { + int available_bits = bit_index; + int bits_to_read = Math.min(available_bits, remaining_bits); + + // 计算要读取的位的掩码 + int mask = (1 << bits_to_read) - 1; + int bits = (cur_byte[decode_pos] >> (available_bits - bits_to_read)) & mask; + + // 将读取的位合并到结果中 + num = (num << bits_to_read) | bits; + + // 更新位宽和 bit_index + remaining_bits -= bits_to_read; + bit_index = available_bits - bits_to_read; + + if (bit_index == 0) { + bit_index = 8; + decode_pos++; + } + } + decode_pos_list[0] = decode_pos; + decode_pos_list[1] = bit_index; + + return num; + } + + public static int BOSBlockDecoderImprove(byte[] encoded, int decode_pos, int[] value_list, int block_size, + int[] value_pos_arr) { + + int value0 = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + value_list[value_pos_arr[0]] = value0; + value_pos_arr[0]++; + + int min_delta = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + + int bit_width_final = bytes2Integer(encoded, decode_pos, 1); + decode_pos += 1; + + int[] decode_list = new int[2]; + decode_list[0] = decode_pos; + decode_list[1] = 8; + int pre_v = value0; + for (int i = 0; i < block_size; i++) { + int cur_delta = min_delta + DecodeBits(encoded, bit_width_final, decode_list); + pre_v += cur_delta; + value_list[value_pos_arr[0]++] = pre_v; + } + if (decode_list[1] != 8) { + return decode_list[0] + 1; + } else { + return decode_list[0]; + } + // value_pos_arr[0] = valuePos; + // return decode_list[0]; + + // ArrayList<Integer> decode_pos_normal = new ArrayList<>(); + // ArrayList<Integer> final_normal = decodeOutlier2Bytes(encoded, decode_pos, + // bit_width_final, block_size, decode_pos_normal); + // + // decode_pos = decode_pos_normal.get(0); + // int normal_i = 0; + //// int pre_v = value0; + // + // for (int i = 0; i < block_size; i++) { + // int current_delta = min_delta + final_normal.get(normal_i) ; + // pre_v = current_delta + pre_v; + // value_list[value_pos_arr[0]] = pre_v; + // value_pos_arr[0]++; + // } + // + // return decode_pos; + } + + public static void BOSDecoderImprove(byte[] encoded) { + + int decode_pos = 0; + int length_all = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + int block_size = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + + int block_num = length_all / block_size; + int remain_length = length_all - block_num * block_size; + + int[] value_list = new int[length_all + block_size]; + block_size--; + + int[] value_pos_arr = new int[1]; + for (int k = 0; k < block_num; k++) { + + decode_pos = BOSBlockDecoderImprove(encoded, decode_pos, value_list, block_size, value_pos_arr); + + } + + if (remain_length <= 3) { + for (int i = 0; i < remain_length; i++) { + int value_end = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; + value_list[value_pos_arr[0]] = value_end; + value_pos_arr[0]++; + } + } else { + remain_length--; + BOSBlockDecoderImprove(encoded, decode_pos, value_list, remain_length, value_pos_arr); + } + } + + private static void addToArchiveCompression(SevenZOutputFile out, File file, String dir) { + String name = dir + File.separator + file.getName(); + if (dir.equals(".")) { + name = file.getName(); + } + if (file.isFile()) { + SevenZArchiveEntry entry = null; + FileInputStream in = null; + try { + entry = out.createArchiveEntry(file, name); + out.putArchiveEntry(entry); + in = new FileInputStream(file); + byte[] b = new byte[1024]; + int count = 0; + while ((count = in.read(b)) > 0) { + out.write(b, 0, count); + } + } catch (IOException e) { + e.printStackTrace(); + } finally { + try { + out.closeArchiveEntry(); + in.close(); + } catch (IOException e) { + e.printStackTrace(); + } + + } + } else if (file.isDirectory()) { + File[] children = file.listFiles(); + if (children != null) { + for (File child : children) { + addToArchiveCompression(out, child, name); + } + } + } else { + System.out.println(file.getName() + " is not supported"); + } + } + + public static int getDecimalPrecision(String str) { + // 查找小数点的位置 + int decimalIndex = str.indexOf("."); + + // 如果没有小数点,精度为0 + if (decimalIndex == -1) { + return 0; + } + + // 获取小数点后的部分并返回其长度 + return str.substring(decimalIndex + 1).length(); + } + + public static String extractFileName(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + File file = new File(path); + String fileName = file.getName(); + + int dotIndex = fileName.lastIndexOf('.'); + + if (dotIndex == -1 || dotIndex == 0) { + return fileName; + } + + return fileName.substring(0, dotIndex); + } + + @Test + public void testSubcolumn() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String input_parent_dir = parent_dir + "dataset/"; + + String output_parent_dir = "D:/encoding-subcolumn/result/"; + // String output_parent_dir = parent_dir + "result/"; + + String outputPath = output_parent_dir + "ts2diff.csv"; + + int block_size = 1024; + + int repeatTime = 100; + + // repeatTime = 1; + + List<String> integerDatasets = new ArrayList<>(); + integerDatasets.add("Wine-Tasting"); + + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); // write header to output file + File directory = new File(input_parent_dir); + // File[] csvFiles = directory.listFiles(); + File[] csvFiles = directory.listFiles((dir, name) -> name.endsWith(".csv")); + + for (File file : csvFiles) { + // f = tempList[1]; + // System.out.println(f); + String datasetName = extractFileName(file.toString()); + System.out.println(datasetName); + InputStream inputStream = Files.newInputStream(file.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Float> data1 = new ArrayList<>(); + // ArrayList<Integer> data2 = new ArrayList<>(); + + // loader.readHeaders(); + + int max_decimal = 0; + while (loader.readRecord()) { + String f_str = loader.getValues()[0]; + if (f_str.isEmpty()) { + continue; + } + int cur_decimal = getDecimalPrecision(f_str); + if (cur_decimal > max_decimal) { + max_decimal = cur_decimal; + } + // String value = loader.getValues()[index]; + data1.add(Float.valueOf(f_str)); + // data2.add(Integer.valueOf(loader.getValues()[1])); + // data.add(Integer.valueOf(value)); + } + + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + int max_mul = (int) Math.pow(10, max_decimal); + for (int i = 0; i < data1.size(); i++) { + data2_arr[i] = (int) (data1.get(i) * max_mul); + } + + System.out.println(max_decimal); + byte[] encoded_result = new byte[data2_arr.length * 4]; + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = BOSEncoder(data2_arr, block_size, encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + + double ratioTmp; + + if (integerDatasets.contains(datasetName)) { + ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + } else { + ratioTmp = compressed_size / (double) (data1.size() * Long.BYTES); + } + + ratio += ratioTmp; + + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + BOSDecoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + String[] record = { + datasetName, + "TS2DIFF", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data1.size()), + String.valueOf(compressed_size), + String.valueOf(ratio) + }; + writer.writeRecord(record); + System.out.println(ratio); + + } + writer.close(); + + } + + @Test + public void testTransData() throws IOException { + String parent_dir = "D:/github/xjz17/subcolumn/"; + + String output_parent_dir = "D:/encoding-subcolumn/trans_data_result/"; + // String output_parent_dir = parent_dir + "trans_data_result/"; + + String input_parent_dir = parent_dir + "trans_data/"; + + ArrayList<String> input_path_list = new ArrayList<>(); + ArrayList<String> output_path_list = new ArrayList<>(); + ArrayList<String> dataset_name = new ArrayList<>(); + ArrayList<Integer> dataset_block_size = new ArrayList<>(); + + try (Stream<Path> paths = Files.walk(Paths.get(input_parent_dir))) { + paths.filter(Files::isDirectory) + .filter(path -> !path.equals(Paths.get(input_parent_dir))) + .forEach(dir -> { + String name = dir.getFileName().toString(); + dataset_name.add(name); + input_path_list.add(dir.toString()); + dataset_block_size.add(1024); + }); + } + + String outputPath = output_parent_dir + "ts2diff.csv"; + CsvWriter writer = new CsvWriter(outputPath, ',', StandardCharsets.UTF_8); + writer.setRecordDelimiter('\n'); + + String[] head = { + "Dataset", + "Encoding Algorithm", + "Encoding Time", + "Decoding Time", + "Points", + "Compressed Size", + "Compression Ratio" + }; + writer.writeRecord(head); + + int repeatTime = 100; + + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { + + String inputPath = input_path_list.get(file_i); + System.out.println(inputPath); + + File file = new File(inputPath); + File[] tempList = file.listFiles(); + + long totalEncodeTime = 0; + long totalDecodeTime = 0; + double totalCompressedSize = 0; + int totalPoints = 0; + + for (File f : tempList) { + String datasetName = extractFileName(f.toString()); + InputStream inputStream = Files.newInputStream(f.toPath()); + + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<Integer> data1 = new ArrayList<>(); + ArrayList<Integer> data2 = new ArrayList<>(); + + loader.readHeaders(); + while (loader.readRecord()) { + // String value = loader.getValues()[index]; + data1.add(Integer.valueOf(loader.getValues()[0])); + data2.add(Integer.valueOf(loader.getValues()[1])); + // data.add(Integer.valueOf(value)); + } + inputStream.close(); + int[] data2_arr = new int[data1.size()]; + for (int i = 0; i < data2.size(); i++) { + data2_arr[i] = data2.get(i); + } + byte[] encoded_result = new byte[data2_arr.length * 4]; + long encodeTime = 0; + long decodeTime = 0; + double ratio = 0; + double compressed_size = 0; + + int length = 0; + + long s = System.nanoTime(); + for (int repeat = 0; repeat < repeatTime; repeat++) { + length = BOSEncoder(data2_arr, dataset_block_size.get(file_i), encoded_result); + } + + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime); + compressed_size += length; + double ratioTmp = compressed_size / (double) (data1.size() * Integer.BYTES); + ratio += ratioTmp; + s = System.nanoTime(); + + for (int repeat = 0; repeat < repeatTime; repeat++) { + BOSDecoder(encoded_result); + } + + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime); + + totalEncodeTime += encodeTime; + totalDecodeTime += decodeTime; + totalCompressedSize += compressed_size; + totalPoints += data1.size(); + + } + + double compressionRatio = totalCompressedSize / (totalPoints * Integer.BYTES); + + String[] record = { + dataset_name.get(file_i), + "TS2DIFF", + String.valueOf(totalEncodeTime), + String.valueOf(totalDecodeTime), + String.valueOf(totalPoints), + String.valueOf(totalCompressedSize), + String.valueOf(compressionRatio) + }; + + writer.writeRecord(record); + System.out.println(compressionRatio); + } + writer.close(); + } + +}