| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Package csv reads CSV files and presents the extracted data as records, also |
| // writes data as record into CSV files |
| package csv |
| |
| import ( |
| "errors" |
| "fmt" |
| |
| "github.com/apache/arrow/go/v6/arrow" |
| "github.com/apache/arrow/go/v6/arrow/memory" |
| ) |
| |
| var ( |
| ErrMismatchFields = errors.New("arrow/csv: number of records mismatch") |
| ) |
| |
| // Option configures a CSV reader/writer. |
| type Option func(config) |
| type config interface{} |
| |
| // WithComma specifies the fields separation character used while parsing CSV files. |
| func WithComma(c rune) Option { |
| return func(cfg config) { |
| switch cfg := cfg.(type) { |
| case *Reader: |
| cfg.r.Comma = c |
| case *Writer: |
| cfg.w.Comma = c |
| default: |
| panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) |
| } |
| } |
| } |
| |
| // WithComment specifies the comment character used while parsing CSV files. |
| func WithComment(c rune) Option { |
| return func(cfg config) { |
| switch cfg := cfg.(type) { |
| case *Reader: |
| cfg.r.Comment = c |
| default: |
| panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) |
| } |
| } |
| } |
| |
| // WithAllocator specifies the Arrow memory allocator used while building records. |
| func WithAllocator(mem memory.Allocator) Option { |
| return func(cfg config) { |
| switch cfg := cfg.(type) { |
| case *Reader: |
| cfg.mem = mem |
| default: |
| panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) |
| } |
| } |
| } |
| |
| // WithChunk specifies the chunk size used while parsing CSV files. |
| // |
| // If n is zero or 1, no chunking will take place and the reader will create |
| // one record per row. |
| // If n is greater than 1, chunks of n rows will be read. |
| // If n is negative, the reader will load the whole CSV file into memory and |
| // create one big record with all the rows. |
| func WithChunk(n int) Option { |
| return func(cfg config) { |
| switch cfg := cfg.(type) { |
| case *Reader: |
| cfg.chunk = n |
| default: |
| panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) |
| } |
| } |
| } |
| |
| // WithCRLF specifies the line terminator used while writing CSV files. |
| // If useCRLF is true, \r\n is used as the line terminator, otherwise \n is used. |
| // The default value is false. |
| func WithCRLF(useCRLF bool) Option { |
| return func(cfg config) { |
| switch cfg := cfg.(type) { |
| case *Writer: |
| cfg.w.UseCRLF = useCRLF |
| default: |
| panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) |
| } |
| } |
| } |
| |
| // WithHeader enables or disables CSV-header handling. |
| func WithHeader(useHeader bool) Option { |
| return func(cfg config) { |
| switch cfg := cfg.(type) { |
| case *Reader: |
| cfg.header = useHeader |
| case *Writer: |
| cfg.header = useHeader |
| default: |
| panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) |
| } |
| } |
| } |
| |
| // DefaultNullValues is the set of values considered as NULL values by default |
| // when Reader is configured to handle NULL values. |
| var DefaultNullValues = []string{"", "NULL", "null"} |
| |
| // WithNullReader sets options for a CSV Reader pertaining to NULL value |
| // handling. If stringsCanBeNull is true, then a string that matches one of the |
| // nullValues set will be interpreted as NULL. Numeric columns will be checked |
| // for nulls in all cases. If no nullValues arguments are passed in, the |
| // defaults set in NewReader() will be kept. |
| // |
| // When no NULL values is given, the default set is taken from DefaultNullValues. |
| func WithNullReader(stringsCanBeNull bool, nullValues ...string) Option { |
| return func(cfg config) { |
| switch cfg := cfg.(type) { |
| case *Reader: |
| cfg.stringsCanBeNull = stringsCanBeNull |
| |
| if len(nullValues) == 0 { |
| nullValues = DefaultNullValues |
| } |
| cfg.nulls = make([]string, len(nullValues)) |
| copy(cfg.nulls, nullValues) |
| default: |
| panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) |
| } |
| } |
| } |
| |
| // WithNullWriter sets the null string written for NULL values. The default is |
| // set in NewWriter(). |
| func WithNullWriter(null string) Option { |
| return func(cfg config) { |
| switch cfg := cfg.(type) { |
| case *Writer: |
| cfg.nullValue = null |
| default: |
| panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) |
| } |
| } |
| } |
| |
| func validate(schema *arrow.Schema) { |
| for i, f := range schema.Fields() { |
| switch ft := f.Type.(type) { |
| case *arrow.BooleanType: |
| case *arrow.Int8Type, *arrow.Int16Type, *arrow.Int32Type, *arrow.Int64Type: |
| case *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, *arrow.Uint64Type: |
| case *arrow.Float32Type, *arrow.Float64Type: |
| case *arrow.StringType: |
| default: |
| panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, ft)) |
| } |
| } |
| } |