freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/commonscsv/CommonsCSVTool.java - freemarker-generator - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.freemarker.generator.tools.commonscsv;

 import org.apache.commons.csv.CSVFormat;
 import org.apache.commons.csv.CSVParser;
 import org.apache.commons.csv.CSVRecord;
 import org.apache.commons.io.input.BOMInputStream;
 import org.apache.freemarker.generator.base.datasource.DataSource;
 import org.apache.freemarker.generator.base.util.StringUtils;
 import org.apache.freemarker.generator.tools.commonscsv.impl.CommonsCSVPrinterFacade;

 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.function.Function;
 import java.util.stream.Collectors;

 import static java.nio.charset.StandardCharsets.UTF_8;
 import static java.util.Objects.requireNonNull;
 import static java.util.stream.Collectors.toList;
 import static org.apache.commons.io.IOUtils.toInputStream;

 public class CommonsCSVTool {

     public CSVParser parse(DataSource dataSource) {
         return parse(dataSource, defaultCSVInputFormat());
     }

     public List<CSVParser> parse(Collection<DataSource> dataSources) {
         return dataSources.stream()
                 .map(this::parse)
                 .collect(toList());
     }

     public CSVParser parse(DataSource dataSource, CSVFormat format) {
         if (dataSource == null) {
             throw new IllegalArgumentException("No data source was provided");
         }

         try {
             // As stated in the documentation : "If you do not read all records from the given {@code reader},
             // you should call {@link #close()} on the parser, unless you close the {@code reader}."
             // The underlying input stream is closed by the data source by its "CloseableReaper".
             final InputStream is = new BOMInputStream(dataSource.getInputStream(), false);
             return parse(is, dataSource.getCharset(), format);
         } catch (IOException e) {
             throw new RuntimeException("Failed to parse CSV: " + dataSource, e);
         }
     }

     public CSVParser parse(String csv) {
         return parse(csv, defaultCSVInputFormat());
     }

     public CSVParser parse(String csv, CSVFormat format) {
         if (StringUtils.isEmpty(csv)) {
             throw new IllegalArgumentException("No CSV was provided");
         }

         try {
             // We don't need to close the underlying ByteArrayInputStream
             return parse(toInputStream(csv, UTF_8), UTF_8, format);
         } catch (IOException e) {
             throw new RuntimeException("Failed to parse CSV", e);
         }
     }

     public Map<String, CSVFormat> getFormats() {
         return createCSVFormats();
     }

     public CommonsCSVPrinterFacade printer() throws IOException {
         return printer(defaultCSVOutputFormat());
     }

     public CommonsCSVPrinterFacade printer(CSVFormat csvFormat) throws IOException {
         return new CommonsCSVPrinterFacade(csvFormat);
     }

     /**
      * Extract the list of unique values (keys) of the column "name".
      *
      * @param records records to process
      * @param name    column name to process
      * @return unique keys
      */
     public List<String> toKeys(Collection<CSVRecord> records, String name) {
         return toKeys(records, new ValueResolver(name));
     }

     /**
      * Extract the list of unique values (keys) of the column with the given index..
      *
      * @param records records to process
      * @param index   column index to map
      * @return unique keys
      */
     public List<String> toKeys(Collection<CSVRecord> records, Integer index) {
         return toKeys(records, new ValueResolver(index));
     }

     /**
      * Map the given value of the CVS record into (key to record). If duplicates
      * are encountered return the first occurrence of the CVS record. The map
      * retains the insertion order of they keys.
      *
      * @param records records to process
      * @param name    column name to map
      * @return map of records
      */
     public Map<String, CSVRecord> toMap(Collection<CSVRecord> records, String name) {
         return toMap(records, new ValueResolver(name));
     }

     /**
      * Map the given value of the CVS record into (key to record). If duplicates
      * are encountered return the first occurrence of the CVS record. The map
      * retains the insertion order of they keys.
      *
      * @param records records to process
      * @param index   column index to map
      * @return map of records
      */
     public Map<String, CSVRecord> toMap(Collection<CSVRecord> records, Integer index) {
         return toMap(records, new ValueResolver(index));
     }

     /**
      * Map the given value of the CVS record into a list of records.
      *
      * @param records records to process
      * @param name    column name to map
      * @return map of records
      */
     public Map<String, List<CSVRecord>> toMultiMap(Collection<CSVRecord> records, String name) {
         return toMultiMap(records, new ValueResolver(name));
     }

     /**
      * Map the given value of the CVS record into a list of records.
      *
      * @param records records to process
      * @param index   column index to map
      * @return map of records
      */
     public Map<String, List<CSVRecord>> toMultiMap(Collection<CSVRecord> records, Integer index) {
         return toMultiMap(records, new ValueResolver(index));
     }

     /**
      * Maps the symbolic name of a delimiter to a single character since it
      * is not possible to define commonly used delimiters on the command line.
      *
      * @param name symbolic name of delimiter
      * @return CSV delimiter
      */
     public char toDelimiter(String name) {
         if (name == null || name.isEmpty()) {
             throw new IllegalArgumentException("Now CSV delimiter provided");
         }

         switch (name.toUpperCase().trim()) {
             case "COMMA":
                 return ',';
             case "HASH":
                 return '#';
             case "PIPE":
                 return '|';
             case "RS":
                 return 30;
             case "SEMICOLON":
                 return ';';
             case "SPACE":
                 return ' ';
             case "TAB":
                 return '\t';
             default:
                 if (name.length() == 1) {
                     return name.charAt(0);
                 } else {
                     throw new IllegalArgumentException("Unsupported CSV delimiter: " + name);
                 }
         }
     }

     @Override
     public String toString() {
         return "Process CSV files using Apache Commons CSV (see https://commons.apache.org/proper/commons-csv/)";
     }

     private static CSVParser parse(InputStream is, Charset charset, CSVFormat format) throws IOException {
         if (is == null) {
             throw new IllegalArgumentException("No input stream was provided");
         }

         return CSVParser.parse(is, charset, format);
     }

     private static List<String> toKeys(Collection<CSVRecord> csvRecords, Function<CSVRecord, String> value) {
         return csvRecords.stream()
                 .map(value)
                 .distinct()
                 .collect(toList());
     }

     private static Map<String, CSVRecord> toMap(Collection<CSVRecord> records, Function<CSVRecord, String> value) {
         return records.stream()
                 .collect(Collectors.toMap(
                         value,
                         record -> record,
                         (firstKey, currentKey) -> firstKey,
                         LinkedHashMap::new
                 ));
     }

     private static Map<String, List<CSVRecord>> toMultiMap(Collection<CSVRecord> records, Function<CSVRecord, String> value) {
         final Map<String, List<CSVRecord>> result = new LinkedHashMap<>();
         final List<String> keys = toKeys(records, value);
         keys.forEach(key -> result.put(key, new ArrayList<>()));
         records.forEach(record -> result.get(value.apply(record)).add(record));
         return result;
     }

     private static Map<String, CSVFormat> createCSVFormats() {
         final Map<String, CSVFormat> result = new HashMap<>();
         result.put("DEFAULT", CSVFormat.DEFAULT);
         result.put("DATAFRAME", CSVFormat.RFC4180.withDelimiter(';').withFirstRecordAsHeader());
         result.put("EXCEL", CSVFormat.EXCEL);
         result.put("INFORMIX_UNLOAD", CSVFormat.INFORMIX_UNLOAD);
         result.put("INFORMIX_UNLOAD_CSV", CSVFormat.INFORMIX_UNLOAD_CSV);
         result.put("MONGODB_CSV", CSVFormat.MONGODB_CSV);
         result.put("MONGODB_TSV", CSVFormat.MONGODB_TSV);
         result.put("MYSQL", CSVFormat.MYSQL);
         result.put("RFC4180", CSVFormat.RFC4180);
         result.put("ORACLE", CSVFormat.ORACLE);
         result.put("POSTGRESQL_CSV", CSVFormat.POSTGRESQL_CSV);
         result.put("POSTGRESQL_TEXT", CSVFormat.POSTGRESQL_TEXT);
         result.put("TDF", CSVFormat.TDF);
         return result;
     }

     /**
      * Provides a CSV default input format.
      *
      * @return CSV format
      */
     private static CSVFormat defaultCSVInputFormat() {
         return CSVFormat.DEFAULT;
     }

     /**
      * Provides a CSV default output format.
      *
      * @return CSV format
      */
     private static CSVFormat defaultCSVOutputFormat() {
         return CSVFormat.DEFAULT;
     }

     private static final class ValueResolver implements Function<CSVRecord, String> {

         private final Integer index;
         private final String name;

         ValueResolver(Integer index) {
             this.index = requireNonNull(index);
             this.name = null;
         }

         ValueResolver(String name) {
             this.index = null;
             this.name = requireNonNull(name);
         }

         @Override
         public String apply(CSVRecord record) {
             return index != null ? record.get(index) : record.get(name);
         }
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.freemarker.generator.tools.commonscsv;

	import org.apache.commons.csv.CSVFormat;
	import org.apache.commons.csv.CSVParser;
	import org.apache.commons.csv.CSVRecord;
	import org.apache.commons.io.input.BOMInputStream;
	import org.apache.freemarker.generator.base.datasource.DataSource;
	import org.apache.freemarker.generator.base.util.StringUtils;
	import org.apache.freemarker.generator.tools.commonscsv.impl.CommonsCSVPrinterFacade;

	import java.io.IOException;
	import java.io.InputStream;
	import java.nio.charset.Charset;
	import java.util.ArrayList;
	import java.util.Collection;
	import java.util.HashMap;
	import java.util.LinkedHashMap;
	import java.util.List;
	import java.util.Map;
	import java.util.function.Function;
	import java.util.stream.Collectors;

	import static java.nio.charset.StandardCharsets.UTF_8;
	import static java.util.Objects.requireNonNull;
	import static java.util.stream.Collectors.toList;
	import static org.apache.commons.io.IOUtils.toInputStream;

	public class CommonsCSVTool {

	public CSVParser parse(DataSource dataSource) {
	return parse(dataSource, defaultCSVInputFormat());
	}

	public List<CSVParser> parse(Collection<DataSource> dataSources) {
	return dataSources.stream()
	.map(this::parse)
	.collect(toList());
	}

	public CSVParser parse(DataSource dataSource, CSVFormat format) {
	if (dataSource == null) {
	throw new IllegalArgumentException("No data source was provided");
	}

	try {
	// As stated in the documentation : "If you do not read all records from the given {@code reader},
	// you should call {@link #close()} on the parser, unless you close the {@code reader}."
	// The underlying input stream is closed by the data source by its "CloseableReaper".
	final InputStream is = new BOMInputStream(dataSource.getInputStream(), false);
	return parse(is, dataSource.getCharset(), format);
	} catch (IOException e) {
	throw new RuntimeException("Failed to parse CSV: " + dataSource, e);
	}
	}

	public CSVParser parse(String csv) {
	return parse(csv, defaultCSVInputFormat());
	}

	public CSVParser parse(String csv, CSVFormat format) {
	if (StringUtils.isEmpty(csv)) {
	throw new IllegalArgumentException("No CSV was provided");
	}

	try {
	// We don't need to close the underlying ByteArrayInputStream
	return parse(toInputStream(csv, UTF_8), UTF_8, format);
	} catch (IOException e) {
	throw new RuntimeException("Failed to parse CSV", e);
	}
	}

	public Map<String, CSVFormat> getFormats() {
	return createCSVFormats();
	}

	public CommonsCSVPrinterFacade printer() throws IOException {
	return printer(defaultCSVOutputFormat());
	}

	public CommonsCSVPrinterFacade printer(CSVFormat csvFormat) throws IOException {
	return new CommonsCSVPrinterFacade(csvFormat);
	}

	/**
	* Extract the list of unique values (keys) of the column "name".
	*
	* @param records records to process
	* @param name column name to process
	* @return unique keys
	*/
	public List<String> toKeys(Collection<CSVRecord> records, String name) {
	return toKeys(records, new ValueResolver(name));
	}

	/**
	* Extract the list of unique values (keys) of the column with the given index..
	*
	* @param records records to process
	* @param index column index to map
	* @return unique keys
	*/
	public List<String> toKeys(Collection<CSVRecord> records, Integer index) {
	return toKeys(records, new ValueResolver(index));
	}

	/**
	* Map the given value of the CVS record into (key to record). If duplicates
	* are encountered return the first occurrence of the CVS record. The map
	* retains the insertion order of they keys.
	*
	* @param records records to process
	* @param name column name to map
	* @return map of records
	*/
	public Map<String, CSVRecord> toMap(Collection<CSVRecord> records, String name) {
	return toMap(records, new ValueResolver(name));
	}

	/**
	* Map the given value of the CVS record into (key to record). If duplicates
	* are encountered return the first occurrence of the CVS record. The map
	* retains the insertion order of they keys.
	*
	* @param records records to process
	* @param index column index to map
	* @return map of records
	*/
	public Map<String, CSVRecord> toMap(Collection<CSVRecord> records, Integer index) {
	return toMap(records, new ValueResolver(index));
	}

	/**
	* Map the given value of the CVS record into a list of records.
	*
	* @param records records to process
	* @param name column name to map
	* @return map of records
	*/
	public Map<String, List<CSVRecord>> toMultiMap(Collection<CSVRecord> records, String name) {
	return toMultiMap(records, new ValueResolver(name));
	}

	/**
	* Map the given value of the CVS record into a list of records.
	*
	* @param records records to process
	* @param index column index to map
	* @return map of records
	*/
	public Map<String, List<CSVRecord>> toMultiMap(Collection<CSVRecord> records, Integer index) {
	return toMultiMap(records, new ValueResolver(index));
	}

	/**
	* Maps the symbolic name of a delimiter to a single character since it
	* is not possible to define commonly used delimiters on the command line.
	*
	* @param name symbolic name of delimiter
	* @return CSV delimiter
	*/
	public char toDelimiter(String name) {
	if (name == null \|\| name.isEmpty()) {
	throw new IllegalArgumentException("Now CSV delimiter provided");
	}

	switch (name.toUpperCase().trim()) {
	case "COMMA":
	return ',';
	case "HASH":
	return '#';
	case "PIPE":
	return '\|';
	case "RS":
	return 30;
	case "SEMICOLON":
	return ';';
	case "SPACE":
	return ' ';
	case "TAB":
	return '\t';
	default:
	if (name.length() == 1) {
	return name.charAt(0);
	} else {
	throw new IllegalArgumentException("Unsupported CSV delimiter: " + name);
	}
	}
	}

	@Override
	public String toString() {
	return "Process CSV files using Apache Commons CSV (see https://commons.apache.org/proper/commons-csv/)";
	}

	private static CSVParser parse(InputStream is, Charset charset, CSVFormat format) throws IOException {
	if (is == null) {
	throw new IllegalArgumentException("No input stream was provided");
	}

	return CSVParser.parse(is, charset, format);
	}

	private static List<String> toKeys(Collection<CSVRecord> csvRecords, Function<CSVRecord, String> value) {
	return csvRecords.stream()
	.map(value)
	.distinct()
	.collect(toList());
	}

	private static Map<String, CSVRecord> toMap(Collection<CSVRecord> records, Function<CSVRecord, String> value) {
	return records.stream()
	.collect(Collectors.toMap(
	value,
	record -> record,
	(firstKey, currentKey) -> firstKey,
	LinkedHashMap::new
	));
	}

	private static Map<String, List<CSVRecord>> toMultiMap(Collection<CSVRecord> records, Function<CSVRecord, String> value) {
	final Map<String, List<CSVRecord>> result = new LinkedHashMap<>();
	final List<String> keys = toKeys(records, value);
	keys.forEach(key -> result.put(key, new ArrayList<>()));
	records.forEach(record -> result.get(value.apply(record)).add(record));
	return result;
	}

	private static Map<String, CSVFormat> createCSVFormats() {
	final Map<String, CSVFormat> result = new HashMap<>();
	result.put("DEFAULT", CSVFormat.DEFAULT);
	result.put("DATAFRAME", CSVFormat.RFC4180.withDelimiter(';').withFirstRecordAsHeader());
	result.put("EXCEL", CSVFormat.EXCEL);
	result.put("INFORMIX_UNLOAD", CSVFormat.INFORMIX_UNLOAD);
	result.put("INFORMIX_UNLOAD_CSV", CSVFormat.INFORMIX_UNLOAD_CSV);
	result.put("MONGODB_CSV", CSVFormat.MONGODB_CSV);
	result.put("MONGODB_TSV", CSVFormat.MONGODB_TSV);
	result.put("MYSQL", CSVFormat.MYSQL);
	result.put("RFC4180", CSVFormat.RFC4180);
	result.put("ORACLE", CSVFormat.ORACLE);
	result.put("POSTGRESQL_CSV", CSVFormat.POSTGRESQL_CSV);
	result.put("POSTGRESQL_TEXT", CSVFormat.POSTGRESQL_TEXT);
	result.put("TDF", CSVFormat.TDF);
	return result;
	}

	/**
	* Provides a CSV default input format.
	*
	* @return CSV format
	*/
	private static CSVFormat defaultCSVInputFormat() {
	return CSVFormat.DEFAULT;
	}

	/**
	* Provides a CSV default output format.
	*
	* @return CSV format
	*/
	private static CSVFormat defaultCSVOutputFormat() {
	return CSVFormat.DEFAULT;
	}

	private static final class ValueResolver implements Function<CSVRecord, String> {

	private final Integer index;
	private final String name;

	ValueResolver(Integer index) {
	this.index = requireNonNull(index);
	this.name = null;
	}

	ValueResolver(String name) {
	this.index = null;
	this.name = requireNonNull(name);
	}

	@Override
	public String apply(CSVRecord record) {
	return index != null ? record.get(index) : record.get(name);
	}
	}
	}