ci(python): Set unique development version before building wheels (#442)
Closes #437.
This PR uses the Arrow monorepo tag system, where the tag
`apache-arrow-x.x.x.dev` signals where the changelog should start for
the next version (and makes it easier to use various `git` tools to
calculate the development version number for Python).
It also refactors what was previously `changelog.py` (and is now
`release_tools.py`) since there's more than one thing it does now (and
because I refuse to add any more untestable/untested bash to
dev/release).
diff --git a/.github/workflows/python-wheels.yaml b/.github/workflows/python-wheels.yaml
index 4b8b3b5..f17d74d 100644
--- a/.github/workflows/python-wheels.yaml
+++ b/.github/workflows/python-wheels.yaml
@@ -17,7 +17,7 @@
name: Build Python Wheels
-# Build wheels weekly, on commit to main, or when requested
+# Build wheels on commit to main, or when requested
on:
pull_request:
branches:
@@ -32,14 +32,16 @@
branches:
- main
workflow_dispatch:
- schedule:
- - cron: '6 0 * * 0'
jobs:
build_sdist:
runs-on: "ubuntu-20.04"
steps:
- uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ fetch-tags: true
+
- uses: actions/setup-python@v5
- name: Check that cmake is installed
run: |
@@ -49,6 +51,11 @@
run: |
pip install build twine
+ - name: Set nanoarrow Python dev version
+ if: github.ref == 'refs/heads/main'
+ run: |
+ python dev/release/release_tools.py set_python_dev_version
+
- name: Build sdist
run: |
cd python
@@ -85,6 +92,9 @@
steps:
- uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ fetch-tags: true
- uses: actions/setup-python@v5
if: matrix.config.label != 'linux-arm64'
@@ -94,6 +104,11 @@
- name: Install cibuildwheel
run: python -m pip install cibuildwheel==2.15.0
+ - name: Set nanoarrow Python dev version
+ if: github.ref == 'refs/heads/main'
+ run: |
+ python dev/release/release_tools.py set_python_dev_version
+
- name: Build wheels
run: |
python -m cibuildwheel --output-dir wheelhouse python
@@ -112,7 +127,7 @@
needs: ["build_sdist", "build_wheels"]
name: Upload nightly packages
runs-on: "ubuntu-20.04"
- # if: github.repository == 'apache/arrow-nanoarrow' && github.ref == 'refs/heads/main'
+ if: github.repository == 'apache/arrow-nanoarrow' && github.ref == 'refs/heads/main'
steps:
- uses: actions/download-artifact@v4
with:
@@ -121,7 +136,6 @@
path: dist
- name: Set up Ruby
- if: runner.arch == 'X64' && runner.os != 'macOS'
uses: ruby/setup-ruby@v1
with:
ruby-version: "ruby"
diff --git a/dev/release/01-prepare.sh b/dev/release/01-prepare.sh
index d208fad..6d6b2ec 100755
--- a/dev/release/01-prepare.sh
+++ b/dev/release/01-prepare.sh
@@ -57,7 +57,7 @@
# Update changelog
CHANGELOG="${SOURCE_DIR}/../../CHANGELOG.md"
mv ${CHANGELOG} ${CHANGELOG}.bak
-python3 ${SOURCE_DIR}/changelog.py ${version} ${CHANGELOG}.bak > ${CHANGELOG}
+python3 ${SOURCE_DIR}/release_tools.py changelog ${version} ${CHANGELOG}.bak > ${CHANGELOG}
rm ${CHANGELOG}.bak
git add ${CHANGELOG}
diff --git a/dev/release/02-sign.sh b/dev/release/02-sign.sh
index 0eccef6..816d878 100755
--- a/dev/release/02-sign.sh
+++ b/dev/release/02-sign.sh
@@ -70,7 +70,7 @@
--skip-existing
header "Adding release notes"
- local -r release_notes=$(python3 ${source_dir}/changelog.py)
+ local -r release_notes=$(python3 ${source_dir}/release_tools.py changelog)
echo "${release_notes}"
gh release edit \
"${tag}" \
diff --git a/dev/release/changelog.py b/dev/release/changelog.py
deleted file mode 100644
index 7e09222..0000000
--- a/dev/release/changelog.py
+++ /dev/null
@@ -1,192 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import re
-import subprocess
-
-"""
-A Python script to update CHANGELOG.md
-
-This is similar to cz changelog except is specific to the nanoarrow/Apache
-release/tag format. The usage is:
-
-mv CHANGELOG.md CHANGELOG.md.bak
-python changelog.py <new version> CHANGELOG.md.bak > CHANGELOG.md
-rm CHANGELOG.md.bak
-
-This can be run more than once (e.g., for multiple release candidates) and will
-overwrite the changelog section for <new version>. It always has one newline
-at the end and does not mangle changelog sections for previous versions. It
-groups commit types (e.g., feat, fix, refactor) and groups top-level components.
-"""
-
-
-def git(*args):
- out = subprocess.run(["git"] + list(args), stdout=subprocess.PIPE)
- return out.stdout.decode("UTF-8").splitlines()
-
-
-def find_last_release_sha():
- """Finds the commit of the last release
-
- For the purposes of the changelog, this is the commit where the versions
- were bumped. This would exclude changes that happened during the release
- process but were not picked into the release branch.
- """
- for commit in git("log", "--pretty=oneline"):
- if re.search(r" chore: Update versions on", commit):
- return commit.split(" ")[0]
-
-
-def find_commits_since(begin_sha, end_sha="HEAD"):
- lines = git("log", "--pretty=oneline", f"{begin_sha}..{end_sha}")
- return lines
-
-
-def parse_commits(lines):
- commit_pattern = (
- r"^(?P<sha>[a-z0-9]{40}) (?P<type>[a-z]+)"
- r"(\((?P<component>[a-zA-Z0-9/_-]+)\))?:\s*"
- r"(?P<message>.*)$"
- )
-
- out = []
- for line in lines:
- parsed = re.search(commit_pattern, line)
- if parsed:
- out.append(parsed.groupdict())
-
- return out
-
-
-def group_commits_by_type(parsed):
- grouped = {}
-
- for item in parsed:
- if item["type"] not in grouped:
- grouped[item["type"]] = []
-
- grouped[item["type"]].append(item)
-
- return grouped
-
-
-def group_commits_by_top_level_component(parsed):
- grouped = {}
-
- for item in parsed:
- component = item["component"]
- top_level_component = component.split("/")[0] if component else ""
- if top_level_component not in grouped:
- grouped[top_level_component] = []
-
- grouped[top_level_component].append(item)
-
- return grouped
-
-
-def render_version_content(parsed):
- grouped = group_commits_by_type(parsed)
- for category in grouped:
- grouped[category] = group_commits_by_top_level_component(grouped[category])
-
- out_lines = []
- for category in sorted(grouped):
- if category in ("chore", "ci"):
- continue
-
- out_lines.append(f"### {category.capitalize()}")
- out_lines.append("")
-
- for component in sorted(grouped[category]):
- for item in grouped[category][component]:
- component = item["component"]
- prefix = f"**{component}**: " if component else ""
- message = item["message"]
- out_lines.append(f"- {prefix}{message}")
-
- out_lines.append("")
-
- if out_lines[-1] == "":
- out_lines.pop(-1)
- return "\n".join(out_lines)
-
-
-def parse_changelog(content):
- header, content = re.split(r"# nanoarrow Changelog", content)
- header += "# nanoarrow Changelog"
- content = content.strip()
-
- version_split = re.split(r"(^|\n)##\s+nanoarrow ([^\n]*)", content)
- version_split.pop(0)
-
- version_content = {}
- for i in range(0, len(version_split), 3):
- version_content[version_split[i + 1]] = version_split[i + 2].strip()
-
- return header, version_content
-
-
-def render_new_changelog(unreleased_version=None, changelog_file=None):
- sha = find_last_release_sha()
- commits = find_commits_since(sha)
- parsed = parse_commits(commits)
-
- latest_version_content = render_version_content(parsed)
-
- if changelog_file is None and unreleased_version is None:
- return latest_version_content
-
- if changelog_file is None:
- return f"## nanoarrow {unreleased_version}\n\n" + latest_version_content
-
- with open(changelog_file) as f:
- changelog_content = f.read()
-
- header, version_content = parse_changelog(changelog_content)
-
- version_content[unreleased_version] = latest_version_content
-
- out_lines = []
- out_lines.append(header)
- out_lines.append("")
-
- for version, content in version_content.items():
- out_lines.append(f"## nanoarrow {version}")
- out_lines.append("")
- out_lines.append(content)
- out_lines.append("")
-
- if out_lines[-1] == "":
- out_lines.pop(-1)
- return "\n".join(out_lines)
-
-
-if __name__ == "__main__":
- import sys
-
- if len(sys.argv) >= 3:
- changelog_file = sys.argv[2]
- unreleased_version = sys.argv[1]
- elif len(sys.argv) >= 2:
- changelog_file = None
- unreleased_version = sys.argv[1]
- else:
- changelog_file = None
- unreleased_version = None
-
- print(render_new_changelog(unreleased_version, changelog_file))
diff --git a/dev/release/post-02-bump-versions.sh b/dev/release/post-02-bump-versions.sh
index 80245fd..378522f 100755
--- a/dev/release/post-02-bump-versions.sh
+++ b/dev/release/post-02-bump-versions.sh
@@ -48,3 +48,12 @@
git checkout apache-arrow-nanoarrow-${version} -- CHANGELOG.md
git commit -m "chore: update changelog for ${version}"
echo "Updated changelog on branch."
+
+############### Remind to tag main with a dev tag after merge ###############
+
+dev_tag=apache-arrow-nanoarrow-${next_version}.dev
+
+echo "After merging this PR, run:"
+echo "git pull upstream main"
+echo "git tag ${dev_tag} main"
+echo "git push upstream ${dev_tag}"
diff --git a/dev/release/release_tools.py b/dev/release/release_tools.py
new file mode 100644
index 0000000..a46156d
--- /dev/null
+++ b/dev/release/release_tools.py
@@ -0,0 +1,254 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Python implementations of various release tasks
+
+Use `python release_tools.py --help` for usage
+"""
+
+import argparse
+import os
+import re
+import subprocess
+
+
+def git(*args):
+ out = subprocess.run(["git"] + list(args), stdout=subprocess.PIPE)
+ return out.stdout.decode().strip().splitlines()
+
+
+def src_path(*args):
+ release_dir = os.path.dirname(__file__)
+ relative_path = os.path.join(release_dir, "..", "..", *args)
+ return os.path.abspath(relative_path)
+
+
+def file_regex_replace(pattern, replacement, path):
+ with open(path) as f:
+ content = f.read()
+
+ # It is usually good to know if zero items are about to be replaced
+ if re.search(pattern, content) is None:
+ raise ValueError(f"file {path} does not contain pattern '{pattern}'")
+
+ content = re.sub(pattern, replacement, content)
+ with open(path, "w") as f:
+ f.write(content)
+
+
+def find_last_dev_tag():
+ """Finds the commit of the last version bump
+
+ Note that this excludes changes that happened during the release
+ process but were not picked into the release branch.
+ """
+ last_dev_tag = git(
+ "describe", "--match", "apache-arrow-nanoarrow-*.dev", "--tags", "--abbrev=0"
+ )[0]
+ last_version = re.search(r"[0-9]+\.[0-9]+\.[0-9]+", last_dev_tag).group(0)
+ sha = git("rev-list", "-n", "1", last_dev_tag)[0]
+ return last_version, sha
+
+
+def find_commits_since(begin_sha, end_sha="HEAD"):
+ lines = git("log", "--pretty=oneline", f"{begin_sha}..{end_sha}")
+ return lines
+
+
+def add_set_python_dev_version_subparser(subparsers):
+ subparsers.add_parser(
+ "set_python_dev_version",
+ description=(
+ "Set the Python package development version based on "
+ "the number of commits since the last version bump"
+ ),
+ )
+
+
+def set_python_dev_version_command(args):
+ _, last_dev_tag = find_last_dev_tag()
+ dev_distance = len(find_commits_since(last_dev_tag))
+
+ version_file = src_path("python", "src", "nanoarrow", "_static_version.py")
+ file_regex_replace(
+ r'"([0-9]+\.[0-9]+\.[0-9]+)\.dev[0-9]+"',
+ f'"\\1.dev{dev_distance}"',
+ version_file,
+ )
+
+
+def parse_commits(lines):
+ commit_pattern = (
+ r"^(?P<sha>[a-z0-9]{40}) (?P<type>[a-z]+)"
+ r"(\((?P<component>[a-zA-Z0-9/_-]+)\))?:\s*"
+ r"(?P<message>.*)$"
+ )
+
+ out = []
+ for line in lines:
+ parsed = re.search(commit_pattern, line)
+ if parsed:
+ out.append(parsed.groupdict())
+
+ return out
+
+
+def group_commits_by_type(parsed):
+ grouped = {}
+
+ for item in parsed:
+ if item["type"] not in grouped:
+ grouped[item["type"]] = []
+
+ grouped[item["type"]].append(item)
+
+ return grouped
+
+
+def group_commits_by_top_level_component(parsed):
+ grouped = {}
+
+ for item in parsed:
+ component = item["component"]
+ top_level_component = component.split("/")[0] if component else ""
+ if top_level_component not in grouped:
+ grouped[top_level_component] = []
+
+ grouped[top_level_component].append(item)
+
+ return grouped
+
+
+def render_version_content(parsed):
+ grouped = group_commits_by_type(parsed)
+ for category in grouped:
+ grouped[category] = group_commits_by_top_level_component(grouped[category])
+
+ out_lines = []
+ for category in sorted(grouped):
+ if category in ("chore", "ci"):
+ continue
+
+ out_lines.append(f"### {category.capitalize()}")
+ out_lines.append("")
+
+ for component in sorted(grouped[category]):
+ for item in grouped[category][component]:
+ component = item["component"]
+ prefix = f"**{component}**: " if component else ""
+ message = item["message"]
+ out_lines.append(f"- {prefix}{message}")
+
+ out_lines.append("")
+
+ if out_lines[-1] == "":
+ out_lines.pop(-1)
+ return "\n".join(out_lines)
+
+
+def parse_changelog(content):
+ header, content = re.split(r"# nanoarrow Changelog", content)
+ header += "# nanoarrow Changelog"
+ content = content.strip()
+
+ version_split = re.split(r"(^|\n)##\s+nanoarrow ([^\n]*)", content)
+ version_split.pop(0)
+
+ version_content = {}
+ for i in range(0, len(version_split), 3):
+ version_content[version_split[i + 1]] = version_split[i + 2].strip()
+
+ return header, version_content
+
+
+def render_new_changelog(unreleased_version=None, changelog_file=None):
+ _, sha = find_last_dev_tag()
+ commits = find_commits_since(sha)
+ parsed = parse_commits(commits)
+
+ latest_version_content = render_version_content(parsed)
+
+ if changelog_file is None and unreleased_version is None:
+ return latest_version_content
+
+ if changelog_file is None:
+ return f"## nanoarrow {unreleased_version}\n\n" + latest_version_content
+
+ with open(changelog_file) as f:
+ changelog_content = f.read()
+
+ header, version_content = parse_changelog(changelog_content)
+
+ version_content[unreleased_version] = latest_version_content
+
+ out_lines = []
+ out_lines.append(header)
+ out_lines.append("")
+
+ for version, content in version_content.items():
+ out_lines.append(f"## nanoarrow {version}")
+ out_lines.append("")
+ out_lines.append(content)
+ out_lines.append("")
+
+ if out_lines[-1] == "":
+ out_lines.pop(-1)
+ return "\n".join(out_lines)
+
+
+def add_changelog_parser(subparsers):
+ parser = subparsers.add_parser(
+ "changelog", description="Generate and/or append new CHANGELOG.md content"
+ )
+ parser.add_argument(
+ "unreleased_version",
+ nargs="?",
+ help=(
+ "Prepend heading text ## nanoarrow [unreleased_version]) "
+ "to the latest entries"
+ ),
+ )
+ parser.add_argument(
+ "changelog_file",
+ nargs="?",
+ help="If specified, append new changelog content to this file",
+ )
+
+
+def changelog_command(args):
+ print(render_new_changelog(args.unreleased_version, args.changelog_file))
+
+
+if __name__ == "__main__":
+ import sys
+
+ parser = argparse.ArgumentParser(
+ description="Python functions automating various pieces of release tasks",
+ )
+
+ subparsers = parser.add_subparsers(
+ title="subcommands", dest="subcommand", required=True
+ )
+ add_changelog_parser(subparsers)
+ add_set_python_dev_version_subparser(subparsers)
+
+ args = parser.parse_args(sys.argv[1:])
+ if args.subcommand == "changelog":
+ changelog_command(args)
+ elif args.subcommand == "set_python_dev_version":
+ set_python_dev_version_command(args)
diff --git a/dev/release/test_changelog.py b/dev/release/test_release_tools.py
similarity index 78%
rename from dev/release/test_changelog.py
rename to dev/release/test_release_tools.py
index 7e0d41f..94423ac 100644
--- a/dev/release/test_changelog.py
+++ b/dev/release/test_release_tools.py
@@ -19,23 +19,42 @@
import re
import tempfile
-import changelog
+import pytest
+import release_tools
def test_git():
- git_version = changelog.git("--version")
+ git_version = release_tools.git("--version")
assert len(git_version) == 1
assert re.match(r"git version", git_version[0]) is not None
def test_find_last_release():
- last_release = changelog.find_last_release_sha()
+ last_version, last_release = release_tools.find_last_dev_tag()
+ assert re.match(r"[0-9]+\.[0-9]+\.[0-9]+", last_version)
assert re.match(r"[0-9a-f]{40}", last_release)
+def test_src_path():
+ release_tools_path = release_tools.src_path("dev", "release", "release_tools.py")
+ assert os.path.exists(release_tools_path)
+
+
+def test_file_regex_replace():
+ with tempfile.TemporaryDirectory() as tempdir:
+ path = os.path.join(tempdir, "test.txt")
+ with open(path, "w") as f:
+ f.write("this file contains something that needs to be replaced")
+
+ release_tools.file_regex_replace(r"something\s+", "nothing ", path)
+
+ with pytest.raises(ValueError):
+ release_tools.file_regex_replace("text does not exist in file", "", path)
+
+
def test_find_commits_since():
- last_release = changelog.find_last_release_sha()
- commits = changelog.find_commits_since(last_release)
+ _, last_release = release_tools.find_last_dev_tag()
+ commits = release_tools.find_commits_since(last_release)
assert isinstance(commits, list)
assert len(commits) > 0
@@ -43,8 +62,6 @@
assert isinstance(commit, str)
assert re.match(r"[0-9a-f]{40}", commit)
- assert last_release in commits[-1]
-
def test_parse_commits():
commits = [
@@ -53,7 +70,7 @@
"2" * 40 + " fix(r/sub_dir/sub-dir): A conventional commit with a component",
]
- parsed = changelog.parse_commits(commits)
+ parsed = release_tools.parse_commits(commits)
# Non-conventional commits not included (same as cz ch)
assert len(parsed) == 2
@@ -76,7 +93,7 @@
{"type": "chore", "sha": "2"},
]
- grouped = changelog.group_commits_by_type(parsed)
+ grouped = release_tools.group_commits_by_type(parsed)
assert list(grouped.keys()) == ["fix", "chore"]
assert len(grouped["fix"]) == 2
@@ -95,7 +112,7 @@
{"component": "r", "sha": "3"},
]
- grouped = changelog.group_commits_by_top_level_component(parsed)
+ grouped = release_tools.group_commits_by_top_level_component(parsed)
assert list(grouped.keys()) == ["", "r"]
assert len(grouped[""]) == 2
@@ -116,7 +133,7 @@
{"type": "feat", "component": "r", "message": "message 4"},
]
- rendered = changelog.render_version_content(parsed)
+ rendered = release_tools.render_version_content(parsed)
assert rendered.splitlines() == [
"### Feat",
"",
@@ -146,7 +163,7 @@
]
content = "\n".join(changelog_lines)
- header, version_content = changelog.parse_changelog(content)
+ header, version_content = release_tools.parse_changelog(content)
assert header == "<!-- header stuff we want untouched -->\n\n# nanoarrow Changelog"
assert isinstance(version_content, dict)
@@ -163,10 +180,10 @@
def test_render_new_changelog():
with tempfile.TemporaryDirectory() as tempdir:
- changes_no_version = changelog.render_new_changelog()
+ changes_no_version = release_tools.render_new_changelog()
assert re.match(r"^## nanoarrow", changes_no_version) is None
- changes_with_version = changelog.render_new_changelog("some version info")
+ changes_with_version = release_tools.render_new_changelog("some version info")
assert re.match(r"^## nanoarrow some version info", changes_with_version)
changelog_file_name = os.path.join(tempdir, "CHANGELOG.md")
@@ -182,13 +199,13 @@
f.write(changes_with_version)
# Make sure we do not write two version items for the same version
- modified_changelog = changelog.render_new_changelog(
+ modified_changelog = release_tools.render_new_changelog(
"some version info", changelog_file_name
)
assert len(re.findall(r"\n## nanoarrow", modified_changelog)) == 1
# Make sure do write two version items for different versions
- modified_changelog = changelog.render_new_changelog(
+ modified_changelog = release_tools.render_new_changelog(
"other version info", changelog_file_name
)
assert len(re.findall(r"\n## nanoarrow", modified_changelog)) == 2
diff --git a/python/src/nanoarrow/ipc.py b/python/src/nanoarrow/ipc.py
index 5102a60..5125f77 100644
--- a/python/src/nanoarrow/ipc.py
+++ b/python/src/nanoarrow/ipc.py
@@ -99,7 +99,7 @@
<nanoarrow.c_lib.CArrayStream>
- get_schema(): struct<some_col: int32>
"""
- if _obj_is_buffer(obj):
+ if not hasattr(obj, "readinto") and _obj_is_buffer(obj):
close_obj = True
obj = io.BytesIO(obj)
else:
diff --git a/python/tests/test_version.py b/python/tests/test_version.py
index 701019c..8de6e2c 100644
--- a/python/tests/test_version.py
+++ b/python/tests/test_version.py
@@ -21,7 +21,7 @@
def test_version():
- re_py_version = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+(\.dev[0-9+])?$")
+ re_py_version = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+(\.dev[0-9]+)?$")
assert re_py_version.match(na.__version__) is not None