| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import pathlib |
| import re |
| import subprocess |
| import sys |
| |
| |
| # Generate the nanoarrow_c.pxd file used by the Cython extensions |
| class PxdGenerator: |
| def __init__(self): |
| self._define_regexes() |
| |
| def generate_pxd(self, file_in, file_out): |
| file_in_name = pathlib.Path(file_in).name |
| |
| # Read the header |
| content = None |
| with open(file_in, "r") as input: |
| content = input.read() |
| |
| # Strip comments |
| content = self.re_comment.sub("", content) |
| |
| # Replace NANOARROW_MAX_FIXED_BUFFERS with its value |
| content = self._preprocess_content(content) |
| |
| # Find typedefs, types, and function definitions |
| typedefs = self._find_typedefs(content) |
| types = self._find_types(content) |
| func_defs = self._find_func_defs(content) |
| |
| # Make corresponding cython definitions |
| typedefs_cython = [self._typdef_to_cython(t, " ") for t in typedefs] |
| types_cython = [self._type_to_cython(t, " ") for t in types] |
| func_defs_cython = [self._func_def_to_cython(d, " ") for d in func_defs] |
| |
| # Unindent the header |
| header = self.re_newline_plus_indent.sub("\n", self._pxd_header()) |
| |
| # Write nanoarrow_c.pxd |
| with open(file_out, "wb") as output: |
| output.write(header.encode("UTF-8")) |
| |
| output.write( |
| f'\ncdef extern from "{file_in_name}" nogil:\n'.encode("UTF-8") |
| ) |
| |
| # A few things we add in manually |
| self._write_defs(output) |
| |
| for type in types_cython: |
| output.write(type.encode("UTF-8")) |
| output.write(b"\n\n") |
| |
| for typedef in typedefs_cython: |
| output.write(typedef.encode("UTF-8")) |
| output.write(b"\n") |
| |
| output.write(b"\n") |
| |
| for func_def in func_defs_cython: |
| output.write(func_def.encode("UTF-8")) |
| output.write(b"\n") |
| |
| def _preprocess_content(self, content): |
| return content |
| |
| def _write_defs(self, output): |
| pass |
| |
| def _define_regexes(self): |
| self.re_comment = re.compile(r"\s*//[^\n]*") |
| self.re_typedef = re.compile(r"typedef(?P<typedef>[^;]+)") |
| self.re_type = re.compile( |
| r"(?P<type>struct|union|enum) (?P<name>Arrow[^ ]+) {(?P<body>[^}]*)}" |
| ) |
| self.re_func_def = re.compile( |
| r"\n(static inline )?(?P<const>const )?(struct |enum )?" |
| r"(?P<return_type>[A-Za-z0-9_*]+) " |
| r"(?P<name>Arrow[A-Za-z0-9]+)\((?P<args>[^\)]*)\);" |
| ) |
| self.re_tagged_type = re.compile( |
| r"(?P<type>struct|union|enum) (?P<name>Arrow[A-Za-z]+)" |
| ) |
| self.re_struct_delim = re.compile(r";\s*") |
| self.re_enum_delim = re.compile(r",\s*") |
| self.re_whitespace = re.compile(r"\s+") |
| self.re_newline_plus_indent = re.compile(r"\n +") |
| |
| def _strip_comments(self, content): |
| return self.re_comment.sub("", content) |
| |
| def _find_typedefs(self, content): |
| return [m.groupdict() for m in self.re_typedef.finditer(content)] |
| |
| def _find_types(self, content): |
| return [m.groupdict() for m in self.re_type.finditer(content)] |
| |
| def _find_func_defs(self, content): |
| return [m.groupdict() for m in self.re_func_def.finditer(content)] |
| |
| def _typdef_to_cython(self, t, indent=""): |
| typedef = t["typedef"] |
| typedef = self.re_tagged_type.sub(r"\2", typedef) |
| return f"{indent}ctypedef {typedef}" |
| |
| def _type_to_cython(self, t, indent=""): |
| type = t["type"] |
| name = t["name"] |
| body = self.re_tagged_type.sub(r"\2", t["body"].strip()) |
| if type == "enum": |
| items = [item for item in self.re_enum_delim.split(body) if item] |
| else: |
| items = [item for item in self.re_struct_delim.split(body) if item] |
| |
| cython_body = f"\n{indent} ".join([""] + items) |
| return f"{indent}{type} {name}:{cython_body}" |
| |
| def _func_def_to_cython(self, d, indent=""): |
| return_type = d["return_type"].strip() |
| if d["const"]: |
| return_type = "const " + return_type |
| name = d["name"] |
| args = re.sub(r"\s+", " ", d["args"].strip()) |
| args = self.re_tagged_type.sub(r"\2", args) |
| |
| # Cython doesn't do (void) |
| if args == "void": |
| args = "" |
| |
| return f"{indent}{return_type} {name}({args})" |
| |
| def _pxd_header(self): |
| return """ |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # cython: language_level = 3 |
| |
| """ |
| |
| |
| class NanoarrowPxdGenerator(PxdGenerator): |
| def _preprocess_content(self, content): |
| content = re.sub(r"NANOARROW_MAX_FIXED_BUFFERS", "3", content) |
| content = re.sub(r"NANOARROW_BINARY_VIEW_INLINE_SIZE", "12", content) |
| content = re.sub(r"NANOARROW_BINARY_VIEW_PREFIX_SIZE", "4", content) |
| return content |
| |
| def _pxd_header(self): |
| return ( |
| super()._pxd_header() |
| + """ |
| from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t |
| from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t |
| """ |
| ) |
| |
| def _write_defs(self, output): |
| output.write(b"\n") |
| output.write(b" cdef int NANOARROW_OK\n") |
| output.write(b" cdef int NANOARROW_MAX_FIXED_BUFFERS\n") |
| output.write(b" cdef int ARROW_FLAG_DICTIONARY_ORDERED\n") |
| output.write(b" cdef int ARROW_FLAG_NULLABLE\n") |
| output.write(b" cdef int ARROW_FLAG_MAP_KEYS_SORTED\n") |
| output.write(b"\n") |
| |
| |
| class NanoarrowDevicePxdGenerator(PxdGenerator): |
| def _preprocess_content(self, content): |
| self.device_names = re.findall("#define (ARROW_DEVICE_[A-Z0-9_]+)", content) |
| return super()._preprocess_content(content) |
| |
| def _find_typedefs(self, content): |
| return [] |
| |
| def _pxd_header(self): |
| return ( |
| super()._pxd_header() |
| + """ |
| from libc.stdint cimport int32_t, int64_t |
| from nanoarrow_c cimport * |
| """ |
| ) |
| |
| def _write_defs(self, output): |
| output.write(b"\n") |
| output.write(b" ctypedef int32_t ArrowDeviceType\n") |
| output.write(b"\n") |
| for name in self.device_names: |
| output.write(f" cdef ArrowDeviceType {name}\n".encode()) |
| output.write(b"\n") |
| |
| |
| # Runs cmake -DNANOARROW_BUNDLE=ON if cmake exists or copies nanoarrow.c/h |
| # from ../dist if it does not. Running cmake is safer because it will sync |
| # any changes from nanoarrow C library sources in the checkout but is not |
| # strictly necessary for things like installing from GitHub. |
| def copy_or_generate_nanoarrow_c(): |
| this_dir = pathlib.Path(__file__).parent.resolve() |
| source_dir = this_dir.parent |
| vendor_dir = this_dir / "vendor" |
| |
| vendored_files = [ |
| "nanoarrow.h", |
| "nanoarrow.c", |
| "nanoarrow_ipc.h", |
| "nanoarrow_ipc.c", |
| "nanoarrow_device.h", |
| "nanoarrow_device.c", |
| ] |
| dst = {name: vendor_dir / name for name in vendored_files} |
| |
| for f in dst.values(): |
| f.unlink(missing_ok=True) |
| |
| is_cmake_dir = (source_dir / "CMakeLists.txt").exists() |
| is_in_nanoarrow_repo = ( |
| is_cmake_dir and (source_dir / "src" / "nanoarrow" / "nanoarrow.h").exists() |
| ) |
| |
| if not is_in_nanoarrow_repo: |
| raise ValueError( |
| "Attempt to build source distribution outside the nanoarrow repo" |
| ) |
| |
| vendor_dir.mkdir(exist_ok=True) |
| subprocess.run( |
| [ |
| sys.executable, |
| source_dir / "ci" / "scripts" / "bundle.py", |
| "--symbol-namespace", |
| "PythonPkg", |
| "--header-namespace", |
| "", |
| "--source-output-dir", |
| vendor_dir, |
| "--include-output-dir", |
| vendor_dir, |
| "--with-device", |
| "--with-ipc", |
| "--with-flatcc", |
| ], |
| ) |
| |
| if not dst["nanoarrow.h"].exists(): |
| raise ValueError("Attempt to vendor nanoarrow.c/h failed") |
| |
| |
| # Runs the pxd generator with some information about the file name |
| def generate_nanoarrow_pxds(): |
| this_dir = pathlib.Path(__file__).parent.resolve() |
| |
| NanoarrowPxdGenerator().generate_pxd( |
| this_dir / "vendor" / "nanoarrow.h", this_dir / "vendor" / "nanoarrow_c.pxd" |
| ) |
| NanoarrowDevicePxdGenerator().generate_pxd( |
| this_dir / "vendor" / "nanoarrow_device.h", |
| this_dir / "vendor" / "nanoarrow_device_c.pxd", |
| ) |
| |
| |
| if __name__ == "__main__": |
| copy_or_generate_nanoarrow_c() |
| generate_nanoarrow_pxds() |