go/arrow/csv/common.go - arrow - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // Package csv reads CSV files and presents the extracted data as records, also
 // writes data as record into CSV files
 package csv

 import (
 	"errors"
 	"fmt"

 	"github.com/apache/arrow/go/v6/arrow"
 	"github.com/apache/arrow/go/v6/arrow/memory"
 )

 var (
 	ErrMismatchFields = errors.New("arrow/csv: number of records mismatch")
 )

 // Option configures a CSV reader/writer.
 type Option func(config)
 type config interface{}

 // WithComma specifies the fields separation character used while parsing CSV files.
 func WithComma(c rune) Option {
 	return func(cfg config) {
 		switch cfg := cfg.(type) {
 		case *Reader:
 			cfg.r.Comma = c
 		case *Writer:
 			cfg.w.Comma = c
 		default:
 			panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
 		}
 	}
 }

 // WithComment specifies the comment character used while parsing CSV files.
 func WithComment(c rune) Option {
 	return func(cfg config) {
 		switch cfg := cfg.(type) {
 		case *Reader:
 			cfg.r.Comment = c
 		default:
 			panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
 		}
 	}
 }

 // WithAllocator specifies the Arrow memory allocator used while building records.
 func WithAllocator(mem memory.Allocator) Option {
 	return func(cfg config) {
 		switch cfg := cfg.(type) {
 		case *Reader:
 			cfg.mem = mem
 		default:
 			panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
 		}
 	}
 }

 // WithChunk specifies the chunk size used while parsing CSV files.
 //
 // If n is zero or 1, no chunking will take place and the reader will create
 // one record per row.
 // If n is greater than 1, chunks of n rows will be read.
 // If n is negative, the reader will load the whole CSV file into memory and
 // create one big record with all the rows.
 func WithChunk(n int) Option {
 	return func(cfg config) {
 		switch cfg := cfg.(type) {
 		case *Reader:
 			cfg.chunk = n
 		default:
 			panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
 		}
 	}
 }

 // WithCRLF specifies the line terminator used while writing CSV files.
 // If useCRLF is true, \r\n is used as the line terminator, otherwise \n is used.
 // The default value is false.
 func WithCRLF(useCRLF bool) Option {
 	return func(cfg config) {
 		switch cfg := cfg.(type) {
 		case *Writer:
 			cfg.w.UseCRLF = useCRLF
 		default:
 			panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
 		}
 	}
 }

 // WithHeader enables or disables CSV-header handling.
 func WithHeader(useHeader bool) Option {
 	return func(cfg config) {
 		switch cfg := cfg.(type) {
 		case *Reader:
 			cfg.header = useHeader
 		case *Writer:
 			cfg.header = useHeader
 		default:
 			panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
 		}
 	}
 }

 // DefaultNullValues is the set of values considered as NULL values by default
 // when Reader is configured to handle NULL values.
 var DefaultNullValues = []string{"", "NULL", "null"}

 // WithNullReader sets options for a CSV Reader pertaining to NULL value
 // handling. If stringsCanBeNull is true, then a string that matches one of the
 // nullValues set will be interpreted as NULL. Numeric columns will be checked
 // for nulls in all cases. If no nullValues arguments are passed in, the
 // defaults set in NewReader() will be kept.
 //
 // When no NULL values is given, the default set is taken from DefaultNullValues.
 func WithNullReader(stringsCanBeNull bool, nullValues ...string) Option {
 	return func(cfg config) {
 		switch cfg := cfg.(type) {
 		case *Reader:
 			cfg.stringsCanBeNull = stringsCanBeNull

 			if len(nullValues) == 0 {
 				nullValues = DefaultNullValues
 			}
 			cfg.nulls = make([]string, len(nullValues))
 			copy(cfg.nulls, nullValues)
 		default:
 			panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
 		}
 	}
 }

 // WithNullWriter sets the null string written for NULL values. The default is
 // set in NewWriter().
 func WithNullWriter(null string) Option {
 	return func(cfg config) {
 		switch cfg := cfg.(type) {
 		case *Writer:
 			cfg.nullValue = null
 		default:
 			panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
 		}
 	}
 }

 func validate(schema *arrow.Schema) {
 	for i, f := range schema.Fields() {
 		switch ft := f.Type.(type) {
 		case *arrow.BooleanType:
 		case *arrow.Int8Type, *arrow.Int16Type, *arrow.Int32Type, *arrow.Int64Type:
 		case *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, *arrow.Uint64Type:
 		case *arrow.Float32Type, *arrow.Float64Type:
 		case *arrow.StringType:
 		default:
 			panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, ft))
 		}
 	}
 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	// Package csv reads CSV files and presents the extracted data as records, also
	// writes data as record into CSV files
	package csv

	import (
	"errors"
	"fmt"

	"github.com/apache/arrow/go/v6/arrow"
	"github.com/apache/arrow/go/v6/arrow/memory"
	)

	var (
	ErrMismatchFields = errors.New("arrow/csv: number of records mismatch")
	)

	// Option configures a CSV reader/writer.
	type Option func(config)
	type config interface{}

	// WithComma specifies the fields separation character used while parsing CSV files.
	func WithComma(c rune) Option {
	return func(cfg config) {
	switch cfg := cfg.(type) {
	case *Reader:
	cfg.r.Comma = c
	case *Writer:
	cfg.w.Comma = c
	default:
	panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
	}
	}
	}

	// WithComment specifies the comment character used while parsing CSV files.
	func WithComment(c rune) Option {
	return func(cfg config) {
	switch cfg := cfg.(type) {
	case *Reader:
	cfg.r.Comment = c
	default:
	panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
	}
	}
	}

	// WithAllocator specifies the Arrow memory allocator used while building records.
	func WithAllocator(mem memory.Allocator) Option {
	return func(cfg config) {
	switch cfg := cfg.(type) {
	case *Reader:
	cfg.mem = mem
	default:
	panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
	}
	}
	}

	// WithChunk specifies the chunk size used while parsing CSV files.
	//
	// If n is zero or 1, no chunking will take place and the reader will create
	// one record per row.
	// If n is greater than 1, chunks of n rows will be read.
	// If n is negative, the reader will load the whole CSV file into memory and
	// create one big record with all the rows.
	func WithChunk(n int) Option {
	return func(cfg config) {
	switch cfg := cfg.(type) {
	case *Reader:
	cfg.chunk = n
	default:
	panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
	}
	}
	}

	// WithCRLF specifies the line terminator used while writing CSV files.
	// If useCRLF is true, \r\n is used as the line terminator, otherwise \n is used.
	// The default value is false.
	func WithCRLF(useCRLF bool) Option {
	return func(cfg config) {
	switch cfg := cfg.(type) {
	case *Writer:
	cfg.w.UseCRLF = useCRLF
	default:
	panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
	}
	}
	}

	// WithHeader enables or disables CSV-header handling.
	func WithHeader(useHeader bool) Option {
	return func(cfg config) {
	switch cfg := cfg.(type) {
	case *Reader:
	cfg.header = useHeader
	case *Writer:
	cfg.header = useHeader
	default:
	panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
	}
	}
	}

	// DefaultNullValues is the set of values considered as NULL values by default
	// when Reader is configured to handle NULL values.
	var DefaultNullValues = []string{"", "NULL", "null"}

	// WithNullReader sets options for a CSV Reader pertaining to NULL value
	// handling. If stringsCanBeNull is true, then a string that matches one of the
	// nullValues set will be interpreted as NULL. Numeric columns will be checked
	// for nulls in all cases. If no nullValues arguments are passed in, the
	// defaults set in NewReader() will be kept.
	//
	// When no NULL values is given, the default set is taken from DefaultNullValues.
	func WithNullReader(stringsCanBeNull bool, nullValues ...string) Option {
	return func(cfg config) {
	switch cfg := cfg.(type) {
	case *Reader:
	cfg.stringsCanBeNull = stringsCanBeNull

	if len(nullValues) == 0 {
	nullValues = DefaultNullValues
	}
	cfg.nulls = make([]string, len(nullValues))
	copy(cfg.nulls, nullValues)
	default:
	panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
	}
	}
	}

	// WithNullWriter sets the null string written for NULL values. The default is
	// set in NewWriter().
	func WithNullWriter(null string) Option {
	return func(cfg config) {
	switch cfg := cfg.(type) {
	case *Writer:
	cfg.nullValue = null
	default:
	panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
	}
	}
	}

	func validate(schema *arrow.Schema) {
	for i, f := range schema.Fields() {
	switch ft := f.Type.(type) {
	case *arrow.BooleanType:
	case arrow.Int8Type, arrow.Int16Type, arrow.Int32Type, arrow.Int64Type:
	case arrow.Uint8Type, arrow.Uint16Type, arrow.Uint32Type, arrow.Uint64Type:
	case arrow.Float32Type, arrow.Float64Type:
	case *arrow.StringType:
	default:
	panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, ft))
	}
	}
	}