exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/BaseCsvTest.java - drill - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.drill.exec.store.easy.text.compliant;

 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.PrintWriter;

 import org.apache.drill.exec.ExecConstants;
 import org.apache.drill.exec.store.easy.text.TextFormatConfig;
 import org.apache.drill.test.ClusterFixture;
 import org.apache.drill.test.ClusterTest;

 public class BaseCsvTest extends ClusterTest {

   protected final int BIG_COL_SIZE = 70_000;

   protected static final String PART_DIR = "root";
   protected static final String NESTED_DIR = "nested";
   protected static final String ROOT_FILE = "first.csv";
   protected static final String NESTED_FILE = "second.csv";
   protected static final String EMPTY_FILE = "empty.csv";

   /**
    * The scan operator can return an empty schema batch as
    * the first batch. But, this broke multiple operators that
    * do not handle this case. So, it is turned off for now.
    * Tests that verified the empty batch use this flag to
    * disable that checking.
    */

   protected static boolean SCHEMA_BATCH_ENABLED = false;

   protected static String validHeaders[] = {
       "a,b,c",
       "10,foo,bar"
   };

   protected static String secondFile[] = {
       "a,b,c",
       "20,fred,wilma"
   };

   protected static File testDir;

   protected static void setup(boolean skipFirstLine, boolean extractHeader) throws Exception {
     setup(skipFirstLine, extractHeader, 1);
   }

   protected static void setup(boolean skipFirstLine, boolean extractHeader,
       int maxParallelization) throws Exception {
     startCluster(
         ClusterFixture.builder(dirTestWatcher)
         .maxParallelization(maxParallelization));

     // Set up CSV storage plugin using headers.
     TextFormatConfig csvFormat = new TextFormatConfig(
         null,
         null,  // line delimiter
         null,  // field delimiter
         null,  // quote
         null,  // escape
         null,  // comment
         skipFirstLine,
         extractHeader
         );

     testDir = cluster.makeDataDir("data", "csv", csvFormat);
   }

   protected static void buildNestedTable() throws IOException {

     // Two-level partitioned table

     File rootDir = new File(testDir, PART_DIR);
     rootDir.mkdir();
     buildFile(new File(rootDir, ROOT_FILE), validHeaders);
     File nestedDir = new File(rootDir, NESTED_DIR);
     nestedDir.mkdir();
     buildFile(new File(nestedDir, NESTED_FILE), secondFile);
   }

   protected void enableMultiScan() {

     // Special test-only feature to force even small scans
     // to use more than one thread. Requires that the max
     // parallelization option be set when starting the cluster.

     client.alterSession(ExecConstants.MIN_READER_WIDTH_KEY, 2);
   }

   protected void resetMultiScan() {
     client.resetSession(ExecConstants.MIN_READER_WIDTH_KEY);
   }

   protected void enableSchema(boolean enable) {
     client.alterSession(ExecConstants.STORE_TABLE_USE_SCHEMA_FILE, enable);
   }

   protected void resetSchema() {
     client.resetSession(ExecConstants.STORE_TABLE_USE_SCHEMA_FILE);
   }

   protected static void buildFile(String fileName, String[] data) throws IOException {
     buildFile(new File(testDir, fileName), data);
   }

   protected static void buildFile(File file, String[] data) throws IOException {
     try(PrintWriter out = new PrintWriter(new FileWriter(file))) {
       for (String line : data) {
         out.println(line);
       }
     }
   }

   protected String buildBigColFile(boolean withHeader) throws IOException {
     String fileName = "hugeCol.csv";
     try(PrintWriter out = new PrintWriter(new FileWriter(new File(testDir, fileName)))) {
       if (withHeader) {
         out.println("id,big,n");
       }
       for (int i = 0; i < 10; i++) {
         out.print(i + 1);
         out.print(",");
         for (int j = 0; j < BIG_COL_SIZE; j++) {
           out.print((char) ((j + i) % 26 + 'A'));
         }
         out.print(",");
         out.println((i + 1) * 10);
       }
     }
     return fileName;
   }

   protected String buildBiggishFile() throws IOException {
     String fileName = "biggish.csv";
     try(PrintWriter out = new PrintWriter(new FileWriter(new File(testDir, fileName)))) {
       out.println("id");
       for (int i = 0; i < 100; i++) {
         out.println(i + 1);
       }
     }
     return fileName;
   }

   protected static final String FILE_N_NAME = "file%d.csv";

   protected static String buildTable(String tableName, String[]...fileContents) throws IOException {
     File rootDir = new File(testDir, tableName);
     rootDir.mkdir();
     for (int i = 0; i < fileContents.length; i++) {
       String fileName = String.format(FILE_N_NAME, i);
       buildFile(new File(rootDir, fileName), fileContents[i]);
     }
     return "`dfs.data`.`" + tableName + "`";
   }

   protected void enableSchemaSupport() {
     enableSchema(true);
   }

   protected void resetSchemaSupport() {
     resetSchema();
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.drill.exec.store.easy.text.compliant;

	import java.io.File;
	import java.io.FileWriter;
	import java.io.IOException;
	import java.io.PrintWriter;

	import org.apache.drill.exec.ExecConstants;
	import org.apache.drill.exec.store.easy.text.TextFormatConfig;
	import org.apache.drill.test.ClusterFixture;
	import org.apache.drill.test.ClusterTest;

	public class BaseCsvTest extends ClusterTest {

	protected final int BIG_COL_SIZE = 70_000;

	protected static final String PART_DIR = "root";
	protected static final String NESTED_DIR = "nested";
	protected static final String ROOT_FILE = "first.csv";
	protected static final String NESTED_FILE = "second.csv";
	protected static final String EMPTY_FILE = "empty.csv";

	/**
	* The scan operator can return an empty schema batch as
	* the first batch. But, this broke multiple operators that
	* do not handle this case. So, it is turned off for now.
	* Tests that verified the empty batch use this flag to
	* disable that checking.
	*/

	protected static boolean SCHEMA_BATCH_ENABLED = false;

	protected static String validHeaders[] = {
	"a,b,c",
	"10,foo,bar"
	};

	protected static String secondFile[] = {
	"a,b,c",
	"20,fred,wilma"
	};

	protected static File testDir;

	protected static void setup(boolean skipFirstLine, boolean extractHeader) throws Exception {
	setup(skipFirstLine, extractHeader, 1);
	}

	protected static void setup(boolean skipFirstLine, boolean extractHeader,
	int maxParallelization) throws Exception {
	startCluster(
	ClusterFixture.builder(dirTestWatcher)
	.maxParallelization(maxParallelization));

	// Set up CSV storage plugin using headers.
	TextFormatConfig csvFormat = new TextFormatConfig(
	null,
	null, // line delimiter
	null, // field delimiter
	null, // quote
	null, // escape
	null, // comment
	skipFirstLine,
	extractHeader
	);

	testDir = cluster.makeDataDir("data", "csv", csvFormat);
	}

	protected static void buildNestedTable() throws IOException {

	// Two-level partitioned table

	File rootDir = new File(testDir, PART_DIR);
	rootDir.mkdir();
	buildFile(new File(rootDir, ROOT_FILE), validHeaders);
	File nestedDir = new File(rootDir, NESTED_DIR);
	nestedDir.mkdir();
	buildFile(new File(nestedDir, NESTED_FILE), secondFile);
	}

	protected void enableMultiScan() {

	// Special test-only feature to force even small scans
	// to use more than one thread. Requires that the max
	// parallelization option be set when starting the cluster.

	client.alterSession(ExecConstants.MIN_READER_WIDTH_KEY, 2);
	}

	protected void resetMultiScan() {
	client.resetSession(ExecConstants.MIN_READER_WIDTH_KEY);
	}

	protected void enableSchema(boolean enable) {
	client.alterSession(ExecConstants.STORE_TABLE_USE_SCHEMA_FILE, enable);
	}

	protected void resetSchema() {
	client.resetSession(ExecConstants.STORE_TABLE_USE_SCHEMA_FILE);
	}

	protected static void buildFile(String fileName, String[] data) throws IOException {
	buildFile(new File(testDir, fileName), data);
	}

	protected static void buildFile(File file, String[] data) throws IOException {
	try(PrintWriter out = new PrintWriter(new FileWriter(file))) {
	for (String line : data) {
	out.println(line);
	}
	}
	}

	protected String buildBigColFile(boolean withHeader) throws IOException {
	String fileName = "hugeCol.csv";
	try(PrintWriter out = new PrintWriter(new FileWriter(new File(testDir, fileName)))) {
	if (withHeader) {
	out.println("id,big,n");
	}
	for (int i = 0; i < 10; i++) {
	out.print(i + 1);
	out.print(",");
	for (int j = 0; j < BIG_COL_SIZE; j++) {
	out.print((char) ((j + i) % 26 + 'A'));
	}
	out.print(",");
	out.println((i + 1) * 10);
	}
	}
	return fileName;
	}

	protected String buildBiggishFile() throws IOException {
	String fileName = "biggish.csv";
	try(PrintWriter out = new PrintWriter(new FileWriter(new File(testDir, fileName)))) {
	out.println("id");
	for (int i = 0; i < 100; i++) {
	out.println(i + 1);
	}
	}
	return fileName;
	}

	protected static final String FILE_N_NAME = "file%d.csv";

	protected static String buildTable(String tableName, String[]...fileContents) throws IOException {
	File rootDir = new File(testDir, tableName);
	rootDir.mkdir();
	for (int i = 0; i < fileContents.length; i++) {
	String fileName = String.format(FILE_N_NAME, i);
	buildFile(new File(rootDir, fileName), fileContents[i]);
	}
	return "`dfs.data`.`" + tableName + "`";
	}

	protected void enableSchemaSupport() {
	enableSchema(true);
	}

	protected void resetSchemaSupport() {
	resetSchema();
	}
	}