blob: 97d0f86f8bde2f2472e50a0696218eb449c39610 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import six
from pyarrow.compat import tobytes
cdef class StringBuilder:
"""
Builder class for UTF8 strings. This class exposes facilities for
incrementally adding string values and building the null bitmap
for a pyarrow.Array (type='string').
"""
cdef:
unique_ptr[CStringBuilder] builder
def __cinit__(self, MemoryPool memory_pool=None):
cdef CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
self.builder.reset(new CStringBuilder(pool))
def append(self, value):
"""
Append a single value to the builder. The value can either be a
string/bytes object or a null value (np.nan or None).
Parameters
----------
value : string/bytes or np.nan/None
The value to append to the string array builder
"""
if value is None or value is np.nan:
self.builder.get().AppendNull()
elif isinstance(value, (six.string_types, six.binary_type)):
self.builder.get().Append(tobytes(value))
else:
raise TypeError('StringBuilder only accepts string objects')
def append_values(self, values):
"""
Append all the values in an iterable to the string array builder
object.
Parameters
----------
values : iterable of string/bytes or np.nan/None values
The values to append to the string array builder
"""
for value in values:
self.append(value)
def finish(self):
"""
Return result of builder as an Array object; also resets the builder.
Returns
-------
array : pyarrow.Array
"""
cdef shared_ptr[CArray] out
with nogil:
self.builder.get().Finish(&out)
return pyarrow_wrap_array(out)
@property
def null_count(self):
return self.builder.get().null_count()
def __len__(self):
return self.builder.get().length()