blob: 3a643777cdd31b680737c49c00e27dcf0b5674b2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.sdk.file;
import junit.framework.TestCase;
import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.datatype.StructField;
import org.apache.carbondata.core.scan.expression.ColumnExpression;
import org.apache.carbondata.core.scan.expression.LiteralExpression;
import org.apache.carbondata.core.scan.expression.conditional.EqualToExpression;
import org.apache.carbondata.core.util.CarbonProperties;
import org.apache.carbondata.core.util.path.CarbonTablePath;
import org.apache.carbondata.util.BinaryUtil;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.hadoop.mapreduce.InputSplit;
import org.junit.Assert;
import org.junit.Test;
import javax.imageio.ImageIO;
import javax.imageio.ImageReadParam;
import javax.imageio.ImageReader;
import javax.imageio.ImageTypeSpecifier;
import javax.imageio.stream.FileImageInputStream;
import javax.imageio.stream.ImageInputStream;
import java.awt.color.ColorSpace;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import static org.apache.carbondata.sdk.file.utils.SDKUtil.listFiles;
public class ImageTest extends TestCase {
@Test
public void testWriteWithByteArrayDataType() throws IOException, InvalidLoadOptionException, InterruptedException {
String imagePath = "./src/test/resources/image/carbondatalogo.jpg";
int num = 1;
int rows = 10;
String path = "./target/binary";
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
Field[] fields = new Field[7];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
fields[2] = new Field("image1", DataTypes.BINARY);
fields[3] = new Field("image2", DataTypes.BINARY);
fields[4] = new Field("image3", DataTypes.BINARY);
fields[5] = new Field("decodeString", DataTypes.BINARY);
fields[6] = new Field("decodeByte", DataTypes.BINARY);
String[] projection = new String[]{"name", "age", "image1",
"image2", "image3", "decodeString", "decodeByte"};
byte[] originBinary = null;
// read and write image data
for (int j = 0; j < num; j++) {
CarbonWriter writer = CarbonWriter
.builder()
.outputPath(path)
.withCsvInput(new Schema(fields))
.writtenBy("SDKS3Example")
.withPageSizeInMb(1)
.withLoadOption("binary_decoder", "base64")
.build();
for (int i = 0; i < rows; i++) {
// read image and encode to Hex
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(imagePath));
originBinary = new byte[bis.available()];
while ((bis.read(originBinary)) != -1) {
}
// write data
writer.write(new Object[]{"robot" + (i % 10), i, originBinary,
originBinary, originBinary, "YWJj", "YWJj".getBytes()});
bis.close();
}
writer.close();
}
CarbonReader reader = CarbonReader
.builder(path, "_temp")
.projection(projection)
.build();
System.out.println("\nData:");
int i = 0;
while (i < 20 && reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
byte[] outputBinary = (byte[]) row[2];
byte[] outputBinary2 = (byte[]) row[3];
byte[] outputBinary3 = (byte[]) row[4];
String stringValue = new String((byte[]) row[5]);
String byteValue = new String((byte[]) row[6]);
// when input is string, it will be decoded by base64.
Assert.assertTrue("abc".equals(stringValue));
// when input is byte[], it will be not decoded by base64.
Assert.assertTrue("YWJj".equals(byteValue));
System.out.println(row[0] + " " + row[1] + " image1 size:" + outputBinary.length
+ " image2 size:" + outputBinary2.length + " image3 size:" + outputBinary3.length
+ "\t" + stringValue + "\t" + byteValue);
for (int k = 0; k < 3; k++) {
byte[] originBinaryTemp = (byte[]) row[2 + k];
// validate output binary data and origin binary data
assert (originBinaryTemp.length == outputBinary.length);
for (int j = 0; j < originBinaryTemp.length; j++) {
assert (originBinaryTemp[j] == outputBinary[j]);
assert (originBinary[j] == outputBinary[j]);
}
// save image, user can compare the save image and original image
String destString = "./target/binary/image" + k + "_" + i + ".jpg";
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destString));
bos.write(originBinaryTemp);
bos.close();
}
i++;
}
System.out.println("\nFinished");
reader.close();
}
@Test
public void testWriteBinaryWithSort() {
int num = 1;
String path = "./target/binary";
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
Field[] fields = new Field[5];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
fields[2] = new Field("image1", DataTypes.BINARY);
fields[3] = new Field("image2", DataTypes.BINARY);
fields[4] = new Field("image3", DataTypes.BINARY);
// read and write image data
for (int j = 0; j < num; j++) {
try {
CarbonWriter
.builder()
.outputPath(path)
.withCsvInput(new Schema(fields))
.writtenBy("SDKS3Example")
.withPageSizeInMb(1)
.withTableProperty("sort_columns", "image1")
.build();
assert (false);
} catch (Exception e) {
assert (e.getMessage().contains("sort columns not supported for array, struct, map, double, float, decimal, varchar, binary"));
}
}
}
@Test
public void testWriteBinaryWithLong_string_columns() {
int num = 1;
String path = "./target/binary";
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
Field[] fields = new Field[5];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
fields[2] = new Field("image1", DataTypes.BINARY);
fields[3] = new Field("image2", DataTypes.BINARY);
fields[4] = new Field("image3", DataTypes.BINARY);
// read and write image data
for (int j = 0; j < num; j++) {
try {
CarbonWriter
.builder()
.outputPath(path)
.withCsvInput(new Schema(fields))
.writtenBy("SDKS3Example")
.withPageSizeInMb(1)
.withTableProperty("long_string_columns", "image1")
.build();
assert (false);
} catch (Exception e) {
assert (e.getMessage().contains("long string column : image1 is not supported for data type: BINARY"));
}
}
}
@Test
public void testWriteBinaryWithInverted_index() {
int num = 1;
String path = "./target/binary";
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
Field[] fields = new Field[5];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
fields[2] = new Field("image1", DataTypes.BINARY);
fields[3] = new Field("image2", DataTypes.BINARY);
fields[4] = new Field("image3", DataTypes.BINARY);
// read and write image data
for (int j = 0; j < num; j++) {
try {
CarbonWriter
.builder()
.outputPath(path)
.withCsvInput(new Schema(fields))
.writtenBy("SDKS3Example")
.withPageSizeInMb(1)
.withTableProperty("inverted_index", "image1")
.build();
// TODO: should throw exception
// assert(false);
} catch (Exception e) {
System.out.println(e.getMessage());
assert (e.getMessage().contains("INVERTED_INDEX column: image1 should be present in SORT_COLUMNS"));
}
}
}
@Test
public void testWriteWithNull() throws IOException, InvalidLoadOptionException {
String imagePath = "./src/test/resources/image/carbondatalogo.jpg";
int num = 1;
int rows = 10;
String path = "./target/binary";
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
Field[] fields = new Field[5];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
fields[2] = new Field("image1", DataTypes.BINARY);
fields[3] = new Field("image2", DataTypes.BINARY);
fields[4] = new Field("image3", DataTypes.BINARY);
byte[] originBinary = null;
// read and write image data
for (int j = 0; j < num; j++) {
CarbonWriter writer = CarbonWriter
.builder()
.outputPath(path)
.withCsvInput(new Schema(fields))
.writtenBy("SDKS3Example")
.withPageSizeInMb(1)
.withLoadOption("bad_records_action", "force")
.build();
for (int i = 0; i < rows; i++) {
// read image and encode to Hex
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(imagePath));
originBinary = new byte[bis.available()];
while ((bis.read(originBinary)) != -1) {
}
// write data
writer.write(new Object[]{"robot" + (i % 10), i, originBinary, originBinary, 1});
bis.close();
}
try {
writer.close();
} catch (Exception e) {
assert (e.getMessage().contains("Binary only support String and byte[] data type"));
}
}
}
@Test
public void testBinaryWithOrWithoutFilter() throws IOException, InvalidLoadOptionException, InterruptedException, DecoderException {
String imagePath = "./src/test/resources/image/carbondatalogo.jpg";
int num = 1;
int rows = 1;
String path = "./target/binary";
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
Field[] fields = new Field[3];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
fields[2] = new Field("image", DataTypes.BINARY);
byte[] originBinary = null;
// read and write image data
for (int j = 0; j < num; j++) {
CarbonWriter writer = CarbonWriter
.builder()
.outputPath(path)
.withCsvInput(new Schema(fields))
.writtenBy("SDKS3Example")
.withPageSizeInMb(1)
.build();
for (int i = 0; i < rows; i++) {
// read image and encode to Hex
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(imagePath));
char[] hexValue = null;
originBinary = new byte[bis.available()];
while ((bis.read(originBinary)) != -1) {
hexValue = Hex.encodeHex(originBinary);
}
// write data
writer.write(new String[]{"robot" + (i % 10), String.valueOf(i), String.valueOf(hexValue)});
bis.close();
}
writer.close();
}
// Read data with filter
EqualToExpression equalToExpression = new EqualToExpression(
new ColumnExpression("name", DataTypes.STRING),
new LiteralExpression("robot0", DataTypes.STRING));
CarbonReader reader = CarbonReader
.builder(path, "_temp")
.filter(equalToExpression)
.build();
System.out.println("\nData:");
int i = 0;
while (i < 20 && reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
byte[] outputBinary = Hex.decodeHex(new String((byte[]) row[1]).toCharArray());
System.out.println(row[0] + " " + row[2] + " image size:" + outputBinary.length);
// validate output binary data and origin binary data
assert (originBinary.length == outputBinary.length);
for (int j = 0; j < originBinary.length; j++) {
assert (originBinary[j] == outputBinary[j]);
}
String value = new String(outputBinary);
Assert.assertTrue(value.startsWith("�PNG"));
// save image, user can compare the save image and original image
String destString = "./target/binary/image" + i + ".jpg";
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destString));
bos.write(outputBinary);
bos.close();
i++;
}
System.out.println("\nFinished");
reader.close();
CarbonReader reader2 = CarbonReader
.builder(path, "_temp")
.build();
System.out.println("\nData:");
i = 0;
while (i < 20 && reader2.hasNext()) {
Object[] row = (Object[]) reader2.readNextRow();
byte[] outputBinary = Hex.decodeHex(new String((byte[]) row[1]).toCharArray());
System.out.println(row[0] + " " + row[2] + " image size:" + outputBinary.length);
// validate output binary data and origin binary data
assert (originBinary.length == outputBinary.length);
for (int j = 0; j < originBinary.length; j++) {
assert (originBinary[j] == outputBinary[j]);
}
// save image, user can compare the save image and original image
String destString = "./target/binary/image" + i + ".jpg";
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destString));
bos.write(outputBinary);
bos.close();
i++;
}
reader2.close();
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("\nFinished");
}
@Test
public void testBinaryWithManyImages() throws IOException, InvalidLoadOptionException, InterruptedException {
int num = 1;
String path = "./target/flowers";
Field[] fields = new Field[5];
fields[0] = new Field("binaryId", DataTypes.INT);
fields[1] = new Field("binaryName", DataTypes.STRING);
fields[2] = new Field("binary", "Binary");
fields[3] = new Field("labelName", DataTypes.STRING);
fields[4] = new Field("labelContent", DataTypes.STRING);
String imageFolder = "./src/test/resources/image/flowers";
byte[] originBinary = null;
// read and write image data
for (int j = 0; j < num; j++) {
CarbonWriter writer = CarbonWriter
.builder()
.outputPath(path)
.withCsvInput(new Schema(fields))
.writtenBy("SDKS3Example")
.withPageSizeInMb(1)
.build();
File file = new File(imageFolder);
File[] files = file.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
if (name == null) {
return false;
}
return name.endsWith(".jpg");
}
});
if (null != files) {
for (int i = 0; i < files.length; i++) {
// read image and encode to Hex
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(files[i]));
char[] hexValue = null;
originBinary = new byte[bis.available()];
while ((bis.read(originBinary)) != -1) {
hexValue = Hex.encodeHex(originBinary);
}
String txtFileName = files[i].getCanonicalPath().split(".jpg")[0] + ".txt";
BufferedInputStream txtBis = new BufferedInputStream(new FileInputStream(txtFileName));
String txtValue = null;
byte[] txtBinary = null;
txtBinary = new byte[txtBis.available()];
while ((txtBis.read(txtBinary)) != -1) {
txtValue = new String(txtBinary, "UTF-8");
}
// write data
System.out.println(files[i].getCanonicalPath());
writer.write(new String[]{String.valueOf(i), files[i].getCanonicalPath(), String.valueOf(hexValue),
txtFileName, txtValue});
bis.close();
}
}
writer.close();
}
CarbonReader reader = CarbonReader
.builder(path)
.build();
System.out.println("\nData:");
int i = 0;
while (i < 20 && reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
byte[] outputBinary = (byte[]) row[1];
System.out.println(row[0] + " " + row[2] + " image size:" + outputBinary.length);
// save image, user can compare the save image and original image
String destString = "./target/flowers/image" + i + ".jpg";
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destString));
bos.write(outputBinary);
bos.close();
i++;
}
System.out.println("\nFinished");
reader.close();
}
public void testWriteTwoImageColumn() throws Exception {
String imagePath = "./src/test/resources/image/vocForSegmentationClass";
String path = "./target/vocForSegmentationClass";
int num = 1;
Field[] fields = new Field[4];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
fields[2] = new Field("rawImage", DataTypes.BINARY);
fields[3] = new Field("segmentationClass", DataTypes.BINARY);
byte[] originBinary = null;
byte[] originBinary2 = null;
Object[] files = listFiles(imagePath, ".jpg").toArray();
// read and write image data
for (int j = 0; j < num; j++) {
CarbonWriter writer = CarbonWriter
.builder()
.outputPath(path)
.withCsvInput(new Schema(fields))
.writtenBy("SDKS3Example")
.withPageSizeInMb(1)
.build();
for (int i = 0; i < files.length; i++) {
// read image and encode to Hex
String filePath = (String) files[i];
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(filePath));
originBinary = new byte[bis.available()];
while ((bis.read(originBinary)) != -1) {
}
BufferedInputStream bis2 = new BufferedInputStream(new FileInputStream(filePath.replace(".jpg", ".png")));
originBinary2 = new byte[bis2.available()];
while ((bis2.read(originBinary2)) != -1) {
}
// write data
writer.write(new Object[]{"robot" + (i % 10), i, originBinary, originBinary2});
bis.close();
bis2.close();
}
writer.close();
}
CarbonReader reader = CarbonReader
.builder(path, "_temp")
.build();
System.out.println("\nData:");
int i = 0;
while (i < 20 && reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
byte[] outputBinary = (byte[]) row[1];
byte[] outputBinary2 = (byte[]) row[2];
System.out.println(row[0] + " " + row[3] + " image1 size:" + outputBinary.length
+ " image2 size:" + outputBinary2.length);
for (int k = 0; k < 2; k++) {
byte[] originBinaryTemp = (byte[]) row[1 + k];
// save image, user can compare the save image and original image
String destString = null;
if (k == 0) {
destString = "./target/vocForSegmentationClass/image" + k + "_" + i + ".jpg";
} else {
destString = "./target/vocForSegmentationClass/image" + k + "_" + i + ".png";
}
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destString));
bos.write(originBinaryTemp);
bos.close();
}
i++;
}
System.out.println("\nFinished");
reader.close();
}
@Test
public void testWriteWithByteArrayDataTypeAndManyImagesTxt()
throws Exception {
long startWrite = System.nanoTime();
String sourceImageFolder = "./src/test/resources/image/flowers";
String outputPath = "./target/flowers";
String preDestPath = "./target/flowers/image";
String sufAnnotation = ".txt";
BinaryUtil.binaryToCarbon(sourceImageFolder, outputPath, sufAnnotation, ".jpg");
BinaryUtil.carbonToBinary(outputPath, preDestPath);
long endWrite = System.nanoTime();
System.out.println("write time is " + (endWrite - startWrite) / 1000000000.0 + "s");
}
@Test
public void testWriteWithByteArrayDataTypeAndManyImagesXml()
throws Exception {
long startWrite = System.nanoTime();
String sourceImageFolder = "./src/test/resources/image/voc";
String outputPath = "./target/voc";
String preDestPath = "./target/voc/image";
String sufAnnotation = ".xml";
BinaryUtil.binaryToCarbon(sourceImageFolder, outputPath, sufAnnotation, ".jpg");
BinaryUtil.carbonToBinary(outputPath, preDestPath);
long endWrite = System.nanoTime();
System.out.println("write time is " + (endWrite - startWrite) / 1000000000.0 + "s");
ReadPerformance();
}
public void ReadPerformance() throws Exception {
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.UNSAFE_WORKING_MEMORY_IN_MB, "2048");
long start = System.nanoTime();
int i = 0;
String path = "./target/voc";
CarbonReader reader2 = CarbonReader
.builder(path)
.withBatch(1000)
.build();
System.out.println("\nData2:");
i = 0;
while (reader2.hasNext()) {
Object[] rows = reader2.readNextBatchRow();
for (int j = 0; j < rows.length; j++) {
Object[] row = (Object[]) rows[j];
i++;
if (0 == i % 1000) {
System.out.println(i);
}
for (int k = 0; k < row.length; k++) {
Object column = row[k];
}
}
}
System.out.println(i);
reader2.close();
long end = System.nanoTime();
System.out.println("all time is " + (end - start) / 1000000000.0);
System.out.println("\nFinished");
}
@Test
public void testWriteWithByteArrayDataTypeAndManyImagesTxt3()
throws Exception {
String sourceImageFolder = "./src/test/resources/image/flowers";
String outputPath = "./target/flowers2";
String preDestPath = "./target/flowers2/image";
String sufAnnotation = ".txt";
try {
FileUtils.deleteDirectory(new File(outputPath));
} catch (IOException e) {
e.printStackTrace();
}
binaryToCarbonWithHWD(sourceImageFolder, outputPath, preDestPath, sufAnnotation, ".jpg", 2000);
try {
FileUtils.deleteDirectory(new File(outputPath));
} catch (IOException e) {
e.printStackTrace();
}
}
@Test
public void testNumberOfFiles() throws Exception {
String sourceImageFolder = "./src/test/resources/image/flowers";
List result = listFiles(sourceImageFolder, ".jpg");
assertEquals(3, result.size());
}
@Test
public void testWriteNonBase64WithBase64Decoder() throws IOException, InvalidLoadOptionException, InterruptedException {
String imagePath = "./src/test/resources/image/carbondatalogo.jpg";
int num = 1;
int rows = 10;
String path = "./target/binary";
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
Field[] fields = new Field[7];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
fields[2] = new Field("image1", DataTypes.BINARY);
fields[3] = new Field("image2", DataTypes.BINARY);
fields[4] = new Field("image3", DataTypes.BINARY);
fields[5] = new Field("decodeString", DataTypes.BINARY);
fields[6] = new Field("decodeByte", DataTypes.BINARY);
byte[] originBinary = null;
// read and write image data
for (int j = 0; j < num; j++) {
CarbonWriter writer = CarbonWriter
.builder()
.outputPath(path)
.withCsvInput(new Schema(fields))
.writtenBy("SDKS3Example")
.withPageSizeInMb(1)
.withLoadOption("binary_decoder", "base64")
.build();
for (int i = 0; i < rows; i++) {
// read image and encode to Hex
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(imagePath));
originBinary = new byte[bis.available()];
while ((bis.read(originBinary)) != -1) {
}
// write data
writer.write(new Object[]{"robot" + (i % 10), i, originBinary,
originBinary, originBinary, "^YWJj", "^YWJj".getBytes()});
bis.close();
}
try {
writer.close();
Assert.assertTrue(false);
} catch (Exception e) {
Assert.assertTrue(e.getMessage().contains("Binary decoder is base64, but data is not base64"));
}
}
}
public void testInvalidValueForBinaryDecoder() throws IOException, InvalidLoadOptionException {
String path = "./target/binary";
Field[] fields = new Field[7];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
fields[2] = new Field("image1", DataTypes.BINARY);
fields[3] = new Field("image2", DataTypes.BINARY);
fields[4] = new Field("image3", DataTypes.BINARY);
fields[5] = new Field("decodeString", DataTypes.BINARY);
fields[6] = new Field("decodeByte", DataTypes.BINARY);
try {
CarbonWriter
.builder()
.outputPath(path)
.withCsvInput(new Schema(fields))
.writtenBy("SDKS3Example")
.withPageSizeInMb(1)
.withLoadOption("binary_decoder", "base")
.build();
Assert.assertTrue(false);
} catch (IllegalArgumentException e) {
Assert.assertTrue(e.getMessage().contains(
"Binary decoder only support Base64, Hex or no decode for string, don't support base"));
}
}
public void binaryToCarbonWithHWD(String sourceImageFolder, String outputPath, String preDestPath,
String sufAnnotation, final String sufImage, int numToWrite)
throws Exception {
int num = 1;
Field[] fields = new Field[7];
fields[0] = new Field("height", DataTypes.INT);
fields[1] = new Field("width", DataTypes.INT);
fields[2] = new Field("depth", DataTypes.INT);
fields[3] = new Field("binaryName", DataTypes.STRING);
fields[4] = new Field("binary", DataTypes.BINARY);
fields[5] = new Field("labelName", DataTypes.STRING);
fields[6] = new Field("labelContent", DataTypes.STRING);
byte[] originBinary = null;
// read and write image data
for (int j = 0; j < num; j++) {
Object[] files = listFiles(sourceImageFolder, sufImage).toArray();
int index = 0;
if (null != files) {
CarbonWriter writer = CarbonWriter
.builder()
.outputPath(outputPath)
.withCsvInput(new Schema(fields))
.withBlockSize(256)
.writtenBy("SDKS3Example")
.withPageSizeInMb(1)
.build();
for (int i = 0; i < files.length; i++) {
if (0 == index % numToWrite) {
writer.close();
writer = CarbonWriter
.builder()
.outputPath(outputPath)
.withCsvInput(new Schema(fields))
.withBlockSize(256)
.writtenBy("SDKS3Example")
.withPageSizeInMb(1)
.build();
}
index++;
// read image and encode to Hex
File file = new File((String) files[i]);
System.out.println(file.getCanonicalPath());
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file));
int depth = 0;
boolean isGray;
boolean hasAlpha;
BufferedImage bufferedImage = null;
try {
bufferedImage = ImageIO.read(file);
isGray = bufferedImage.getColorModel().getColorSpace().getType() == ColorSpace.TYPE_GRAY;
hasAlpha = bufferedImage.getColorModel().hasAlpha();
if (isGray) {
depth = 1;
} else if (hasAlpha) {
depth = 4;
} else {
depth = 3;
}
} catch (Exception e) {
e.printStackTrace();
System.out.println(i);
ImageInputStream stream = new FileImageInputStream(new File(file.getCanonicalPath()));
Iterator<ImageReader> iter = ImageIO.getImageReaders(stream);
Exception lastException = null;
while (iter.hasNext()) {
ImageReader reader = null;
try {
reader = (ImageReader) iter.next();
ImageReadParam param = reader.getDefaultReadParam();
reader.setInput(stream, true, true);
Iterator<ImageTypeSpecifier> imageTypes = reader.getImageTypes(0);
while (imageTypes.hasNext()) {
ImageTypeSpecifier imageTypeSpecifier = imageTypes.next();
System.out.println(imageTypeSpecifier.getColorModel().getColorSpace().getType());
int bufferedImageType = imageTypeSpecifier.getBufferedImageType();
if (bufferedImageType == BufferedImage.TYPE_BYTE_GRAY) {
param.setDestinationType(imageTypeSpecifier);
break;
}
}
bufferedImage = reader.read(0, param);
isGray = bufferedImage.getColorModel().getColorSpace().getType() == ColorSpace.TYPE_GRAY;
hasAlpha = bufferedImage.getColorModel().hasAlpha();
if (isGray) {
depth = 1;
} else if (hasAlpha) {
depth = 4;
} else {
depth = 3;
}
if (null != bufferedImage) break;
} catch (Exception e2) {
lastException = e2;
} finally {
if (null != reader) reader.dispose();
}
}
// If you don't have an image at the end of all readers
if (null == bufferedImage) {
if (null != lastException) {
throw lastException;
}
}
} finally {
originBinary = new byte[bis.available()];
while ((bis.read(originBinary)) != -1) {
}
String txtFileName = file.getCanonicalPath().split(sufImage)[0] + sufAnnotation;
BufferedInputStream txtBis = new BufferedInputStream(new FileInputStream(txtFileName));
String txtValue = null;
byte[] txtBinary = null;
txtBinary = new byte[txtBis.available()];
while ((txtBis.read(txtBinary)) != -1) {
txtValue = new String(txtBinary, "UTF-8");
}
// write data
writer.write(new Object[]{bufferedImage.getHeight(), bufferedImage.getWidth(), depth, file.getCanonicalPath(), originBinary,
txtFileName, txtValue.replace("\n", "")});
bis.close();
}
}
writer.close();
}
}
CarbonReader reader = CarbonReader
.builder(outputPath)
.build();
System.out.println("\nData:");
int i = 0;
while (i < 20 && reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
byte[] outputBinary = (byte[]) row[1];
System.out.println(row[2] + " " + row[3] + " " + row[4] + " " + row[5] + " image size:" + outputBinary.length + " " + row[0]);
// save image, user can compare the save image and original image
String destString = preDestPath + i + sufImage;
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destString));
bos.write(outputBinary);
bos.close();
i++;
}
System.out.println("\nFinished");
reader.close();
}
@Test
public void testBinaryWithProjectionAndFileListsAndWithFile() throws Exception {
int num = 5;
String path = "./target/flowersFolder";
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
Field[] fields = new Field[5];
fields[0] = new Field("imageId", DataTypes.INT);
fields[1] = new Field("imageName", DataTypes.STRING);
fields[2] = new Field("imageBinary", DataTypes.BINARY);
fields[3] = new Field("txtName", DataTypes.STRING);
fields[4] = new Field("txtContent", DataTypes.STRING);
String imageFolder = "./src/test/resources/image/flowers";
byte[] originBinary = null;
// read and write image data
for (int j = 0; j < num; j++) {
CarbonWriter writer = CarbonWriter
.builder()
.outputPath(path)
.withCsvInput(new Schema(fields))
.writtenBy("SDKS3Example")
.withPageSizeInMb(1)
.build();
ArrayList files = listFiles(imageFolder, ".jpg");
if (null != files) {
for (int i = 0; i < files.size(); i++) {
// read image and encode to Hex
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(files.get(i).toString()));
char[] hexValue = null;
originBinary = new byte[bis.available()];
while ((bis.read(originBinary)) != -1) {
hexValue = Hex.encodeHex(originBinary);
}
String txtFileName = files.get(i).toString().split(".jpg")[0] + ".txt";
BufferedInputStream txtBis = new BufferedInputStream(new FileInputStream(txtFileName));
String txtValue = null;
byte[] txtBinary = null;
txtBinary = new byte[txtBis.available()];
while ((txtBis.read(txtBinary)) != -1) {
txtValue = new String(txtBinary, "UTF-8");
}
// write data
System.out.println(files.get(i).toString());
writer.write(new String[]{String.valueOf(i), files.get(i).toString(), String.valueOf(hexValue),
txtFileName, txtValue});
bis.close();
}
}
writer.close();
}
// 1. read with file list
List fileLists = listFiles(path, CarbonTablePath.CARBON_DATA_EXT);
int fileNum = fileLists.size() / 2;
Schema schema = CarbonSchemaReader.readSchema((String) fileLists.get(0)).asOriginOrder();
List projectionLists = new ArrayList();
projectionLists.add((schema.getFields())[1].getFieldName());
projectionLists.add((schema.getFields())[2].getFieldName());
CarbonReader reader = ArrowCarbonReader
.builder()
.withFileLists(fileLists.subList(0, fileNum))
.projection(projectionLists)
.buildArrowReader();
System.out.println("\nData:");
int i = 0;
while (i < 20 && reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
assertEquals(2, row.length);
byte[] outputBinary = (byte[]) row[1];
System.out.println(row[0] + " " + row[1] + " image size:" + outputBinary.length);
i++;
}
assert (i == fileNum * 3);
System.out.println("\nFinished: " + i);
reader.close();
// 2. read withFile
CarbonReader reader2 = CarbonReader
.builder()
.withFile(fileLists.get(0).toString())
.build();
System.out.println("\nData2:");
i = 0;
while (i < 20 && reader2.hasNext()) {
Object[] row = (Object[]) reader2.readNextRow();
assertEquals(5, row.length);
assert (null != row[0].toString());
assert (null != row[0].toString());
assert (null != row[0].toString());
byte[] outputBinary = (byte[]) row[1];
String txt = row[2].toString();
System.out.println(row[0] + " " + row[2] +
" image size:" + outputBinary.length + " txt size:" + txt.length());
i++;
}
System.out.println("\nFinished: " + i);
reader2.close();
// 3. read with folder
CarbonReader reader3 = CarbonReader
.builder(path)
.withFolder(path)
.build();
System.out.println("\nData:");
i = 0;
while (i < 20 && reader3.hasNext()) {
Object[] row = (Object[]) reader3.readNextRow();
byte[] outputBinary = (byte[]) row[1];
System.out.println(row[0] + " " + row[2] + " image size:" + outputBinary.length);
i++;
}
System.out.println("\nFinished: " + i);
reader3.close();
InputSplit[] splits = ArrowCarbonReader
.builder()
.withFileLists(fileLists.subList(0, fileNum))
.getSplits(true);
Assert.assertTrue(splits.length == fileNum);
for (int j = 0; j < splits.length; j++) {
ArrowCarbonReader.builder(splits[j]).build();
}
}
public void testGetSplitWithFileListsFromDifferentFolder() throws Exception {
String path1 = "./target/flowersFolder1";
String path2 = "./target/flowersFolder2";
writeCarbonFile(path1, 3);
writeCarbonFile(path2, 2);
List fileLists = listFiles(path1, CarbonTablePath.CARBON_DATA_EXT);
fileLists.addAll(listFiles(path2, CarbonTablePath.CARBON_DATA_EXT));
InputSplit[] splits = ArrowCarbonReader
.builder()
.withFileLists(fileLists)
.getSplits(true);
Assert.assertTrue(5 == splits.length);
for (int j = 0; j < splits.length; j++) {
ArrowCarbonReader.builder(splits[j]).build();
}
}
public void writeCarbonFile(String path, int num) throws Exception {
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
Field[] fields = new Field[5];
fields[0] = new Field("imageId", DataTypes.INT);
fields[1] = new Field("imageName", DataTypes.STRING);
fields[2] = new Field("imageBinary", DataTypes.BINARY);
fields[3] = new Field("txtName", DataTypes.STRING);
fields[4] = new Field("txtContent", DataTypes.STRING);
String imageFolder = "./src/test/resources/image/flowers";
byte[] originBinary = null;
// read and write image data
for (int j = 0; j < num; j++) {
CarbonWriter writer = CarbonWriter
.builder()
.outputPath(path)
.withCsvInput(new Schema(fields))
.writtenBy("SDKS3Example")
.withPageSizeInMb(1)
.build();
ArrayList files = listFiles(imageFolder, ".jpg");
if (null != files) {
for (int i = 0; i < files.size(); i++) {
// read image and encode to Hex
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(files.get(i).toString()));
char[] hexValue = null;
originBinary = new byte[bis.available()];
while ((bis.read(originBinary)) != -1) {
hexValue = Hex.encodeHex(originBinary);
}
String txtFileName = files.get(i).toString().split(".jpg")[0] + ".txt";
BufferedInputStream txtBis = new BufferedInputStream(new FileInputStream(txtFileName));
String txtValue = null;
byte[] txtBinary = null;
txtBinary = new byte[txtBis.available()];
while ((txtBis.read(txtBinary)) != -1) {
txtValue = new String(txtBinary, "UTF-8");
}
// write data
System.out.println(files.get(i).toString());
writer.write(new String[]{String.valueOf(i), files.get(i).toString(), String.valueOf(hexValue),
txtFileName, txtValue});
bis.close();
}
}
writer.close();
}
}
@Test public void testBinaryWithComplexType()
throws IOException, InvalidLoadOptionException, InterruptedException {
int num = 1;
int rows = 1;
String path = "./target/binary";
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
Field[] fields = new Field[4];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
fields[2] = new Field("arrayField", DataTypes.createArrayType(DataTypes.BINARY));
ArrayList<StructField> structFields = new ArrayList<>();
structFields.add(new StructField("b", DataTypes.BINARY));
fields[3] = new Field("structField", DataTypes.createStructType(structFields));
// read and write image data
for (int j = 0; j < num; j++) {
CarbonWriter writer = CarbonWriter.builder().outputPath(path).withCsvInput(new Schema(fields))
.writtenBy("BinaryExample").withPageSizeInMb(1).build();
for (int i = 0; i < rows; i++) {
// write data
writer.write(new String[] { "robot" + (i % 10), String.valueOf(i), "binary1", "binary2" });
}
writer.close();
}
CarbonReader reader = CarbonReader.builder(path, "_temp").build();
while (reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
Object[] arrayResult = (Object[]) row[1];
Object[] structResult = (Object[]) row[2];
assert (new String((byte[]) arrayResult[0]).equalsIgnoreCase("binary1"));
assert (new String((byte[]) structResult[0]).equalsIgnoreCase("binary2"));
}
reader.close();
}
@Test public void testHugeBinaryWithComplexType()
throws IOException, InvalidLoadOptionException, InterruptedException {
int num = 1;
int rows = 1;
String path = "./target/binary";
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
Field[] fields = new Field[2];
fields[0] = new Field("arrayField", DataTypes.createArrayType(DataTypes.BINARY));
ArrayList<StructField> structFields = new ArrayList<>();
structFields.add(new StructField("b", DataTypes.BINARY));
fields[1] = new Field("structField", DataTypes.createStructType(structFields));
String description = RandomStringUtils.randomAlphabetic(33000);
// read and write image data
for (int j = 0; j < num; j++) {
CarbonWriter writer = CarbonWriter.builder().outputPath(path).withCsvInput(new Schema(fields))
.writtenBy("BinaryExample").withPageSizeInMb(5).build();
for (int i = 0; i < rows; i++) {
// write data
writer.write(new String[] { description, description });
}
writer.close();
}
CarbonReader reader = CarbonReader.builder(path, "_temp").build();
while (reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
Object[] arrayResult = (Object[]) row[0];
Object[] structResult = (Object[]) row[1];
assert (new String((byte[]) arrayResult[0]).equalsIgnoreCase(description));
assert (new String((byte[]) structResult[0]).equalsIgnoreCase(description));
}
reader.close();
}
}