blob: 28777ea629e808ebd1dd60a334e556f1a04cf1a7 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
cdef class MemoryPool(_Weakrefable):
"""
Base class for memory allocation.
Besides tracking its number of allocated bytes, a memory pool also
takes care of the required 64-byte alignment for Arrow data.
"""
def __init__(self):
raise TypeError("Do not call {}'s constructor directly, "
"use pyarrow.*_memory_pool instead."
.format(self.__class__.__name__))
cdef void init(self, CMemoryPool* pool):
self.pool = pool
def bytes_allocated(self):
"""
Return the number of bytes that are currently allocated from this
memory pool.
"""
return self.pool.bytes_allocated()
def max_memory(self):
"""
Return the peak memory allocation in this memory pool.
This can be an approximate number in multi-threaded applications.
None is returned if the pool implementation doesn't know how to
compute this number.
"""
ret = self.pool.max_memory()
return ret if ret >= 0 else None
@property
def backend_name(self):
"""
The name of the backend used by this MemoryPool (e.g. "jemalloc").
"""
return frombytes(self.pool.backend_name())
cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
if memory_pool is None:
return c_get_memory_pool()
else:
return memory_pool.pool
cdef class LoggingMemoryPool(MemoryPool):
cdef:
unique_ptr[CLoggingMemoryPool] logging_pool
def __init__(self):
raise TypeError("Do not call {}'s constructor directly, "
"use pyarrow.logging_memory_pool instead."
.format(self.__class__.__name__))
cdef class ProxyMemoryPool(MemoryPool):
"""
Memory pool implementation that tracks the number of bytes and
maximum memory allocated through its direct calls, while redirecting
to another memory pool.
"""
cdef:
unique_ptr[CProxyMemoryPool] proxy_pool
def __init__(self):
raise TypeError("Do not call {}'s constructor directly, "
"use pyarrow.proxy_memory_pool instead."
.format(self.__class__.__name__))
def default_memory_pool():
"""
Return the process-global memory pool.
"""
cdef:
MemoryPool pool = MemoryPool.__new__(MemoryPool)
pool.init(c_get_memory_pool())
return pool
def proxy_memory_pool(MemoryPool parent):
"""
Create and return a MemoryPool instance that redirects to the
*parent*, but with separate allocation statistics.
"""
cdef ProxyMemoryPool out = ProxyMemoryPool.__new__(ProxyMemoryPool)
out.proxy_pool.reset(new CProxyMemoryPool(parent.pool))
out.init(out.proxy_pool.get())
return out
def logging_memory_pool(MemoryPool parent):
"""
Create and return a MemoryPool instance that redirects to the
*parent*, but also dumps allocation logs on stderr.
"""
cdef LoggingMemoryPool out = LoggingMemoryPool.__new__(
LoggingMemoryPool, parent)
out.logging_pool.reset(new CLoggingMemoryPool(parent.pool))
out.init(out.logging_pool.get())
return out
def system_memory_pool():
"""
Return a memory pool based on the C malloc heap.
"""
cdef:
MemoryPool pool = MemoryPool.__new__(MemoryPool)
pool.init(c_system_memory_pool())
return pool
def jemalloc_memory_pool():
"""
Return a memory pool based on the jemalloc heap.
NotImplementedError is raised if jemalloc support is not enabled.
"""
cdef:
CMemoryPool* c_pool
MemoryPool pool = MemoryPool.__new__(MemoryPool)
check_status(c_jemalloc_memory_pool(&c_pool))
pool.init(c_pool)
return pool
def mimalloc_memory_pool():
"""
Return a memory pool based on the mimalloc heap.
NotImplementedError is raised if mimalloc support is not enabled.
"""
cdef:
CMemoryPool* c_pool
MemoryPool pool = MemoryPool.__new__(MemoryPool)
check_status(c_mimalloc_memory_pool(&c_pool))
pool.init(c_pool)
return pool
def set_memory_pool(MemoryPool pool):
c_set_default_memory_pool(pool.pool)
cdef MemoryPool _default_memory_pool = default_memory_pool()
cdef LoggingMemoryPool _logging_memory_pool = logging_memory_pool(
_default_memory_pool)
def log_memory_allocations(enable=True):
"""
Enable or disable memory allocator logging for debugging purposes
Parameters
----------
enable : bool, default True
Pass False to disable logging
"""
if enable:
set_memory_pool(_logging_memory_pool)
else:
set_memory_pool(_default_memory_pool)
def total_allocated_bytes():
"""
Return the currently allocated bytes from the default memory pool.
Other memory pools may not be accounted for.
"""
cdef CMemoryPool* pool = c_get_memory_pool()
return pool.bytes_allocated()
def jemalloc_set_decay_ms(decay_ms):
"""
Set arenas.dirty_decay_ms and arenas.muzzy_decay_ms to indicated number of
milliseconds. A value of 0 (the default) results in dirty / muzzy memory
pages being released right away to the OS, while a higher value will result
in a time-based decay. See the jemalloc docs for more information
It's best to set this at the start of your application.
Parameters
----------
decay_ms : int
Number of milliseconds to set for jemalloc decay conf parameters. Note
that this change will only affect future memory arenas
"""
check_status(c_jemalloc_set_decay_ms(decay_ms))