ci(python): Set unique development version before building wheels (#442)

Closes #437.

This PR uses the Arrow monorepo tag system, where the tag
`apache-arrow-x.x.x.dev` signals where the changelog should start for
the next version (and makes it easier to use various `git` tools to
calculate the development version number for Python).

It also refactors what was previously `changelog.py` (and is now
`release_tools.py`) since there's more than one thing it does now (and
because I refuse to add any more untestable/untested bash to
dev/release).
diff --git a/.github/workflows/python-wheels.yaml b/.github/workflows/python-wheels.yaml
index 4b8b3b5..f17d74d 100644
--- a/.github/workflows/python-wheels.yaml
+++ b/.github/workflows/python-wheels.yaml
@@ -17,7 +17,7 @@
 
 name: Build Python Wheels
 
-# Build wheels weekly, on commit to main, or when requested
+# Build wheels on commit to main, or when requested
 on:
   pull_request:
     branches:
@@ -32,14 +32,16 @@
     branches:
       - main
   workflow_dispatch:
-  schedule:
-    - cron: '6 0 * * 0'
 
 jobs:
   build_sdist:
     runs-on: "ubuntu-20.04"
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        fetch-tags: true
+
     - uses: actions/setup-python@v5
     - name: Check that cmake is installed
       run: |
@@ -49,6 +51,11 @@
       run: |
         pip install build twine
 
+    - name: Set nanoarrow Python dev version
+      if: github.ref == 'refs/heads/main'
+      run: |
+        python dev/release/release_tools.py set_python_dev_version
+
     - name: Build sdist
       run: |
         cd python
@@ -85,6 +92,9 @@
 
     steps:
       - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          fetch-tags: true
 
       - uses: actions/setup-python@v5
         if: matrix.config.label != 'linux-arm64'
@@ -94,6 +104,11 @@
       - name: Install cibuildwheel
         run: python -m pip install cibuildwheel==2.15.0
 
+      - name: Set nanoarrow Python dev version
+        if: github.ref == 'refs/heads/main'
+        run: |
+          python dev/release/release_tools.py set_python_dev_version
+
       - name: Build wheels
         run: |
           python -m cibuildwheel --output-dir wheelhouse python
@@ -112,7 +127,7 @@
     needs: ["build_sdist", "build_wheels"]
     name: Upload nightly packages
     runs-on: "ubuntu-20.04"
-    # if: github.repository == 'apache/arrow-nanoarrow' && github.ref == 'refs/heads/main'
+    if: github.repository == 'apache/arrow-nanoarrow' && github.ref == 'refs/heads/main'
     steps:
       - uses: actions/download-artifact@v4
         with:
@@ -121,7 +136,6 @@
           path: dist
 
       - name: Set up Ruby
-        if: runner.arch == 'X64' && runner.os != 'macOS'
         uses: ruby/setup-ruby@v1
         with:
           ruby-version: "ruby"
diff --git a/dev/release/01-prepare.sh b/dev/release/01-prepare.sh
index d208fad..6d6b2ec 100755
--- a/dev/release/01-prepare.sh
+++ b/dev/release/01-prepare.sh
@@ -57,7 +57,7 @@
 # Update changelog
 CHANGELOG="${SOURCE_DIR}/../../CHANGELOG.md"
 mv ${CHANGELOG} ${CHANGELOG}.bak
-python3 ${SOURCE_DIR}/changelog.py ${version} ${CHANGELOG}.bak > ${CHANGELOG}
+python3 ${SOURCE_DIR}/release_tools.py changelog ${version} ${CHANGELOG}.bak > ${CHANGELOG}
 rm ${CHANGELOG}.bak
 
 git add ${CHANGELOG}
diff --git a/dev/release/02-sign.sh b/dev/release/02-sign.sh
index 0eccef6..816d878 100755
--- a/dev/release/02-sign.sh
+++ b/dev/release/02-sign.sh
@@ -70,7 +70,7 @@
        --skip-existing
 
     header "Adding release notes"
-    local -r release_notes=$(python3 ${source_dir}/changelog.py)
+    local -r release_notes=$(python3 ${source_dir}/release_tools.py changelog)
     echo "${release_notes}"
     gh release edit \
        "${tag}" \
diff --git a/dev/release/changelog.py b/dev/release/changelog.py
deleted file mode 100644
index 7e09222..0000000
--- a/dev/release/changelog.py
+++ /dev/null
@@ -1,192 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import re
-import subprocess
-
-"""
-A Python script to update CHANGELOG.md
-
-This is similar to cz changelog except is specific to the nanoarrow/Apache
-release/tag format. The usage is:
-
-mv CHANGELOG.md CHANGELOG.md.bak
-python changelog.py <new version> CHANGELOG.md.bak > CHANGELOG.md
-rm CHANGELOG.md.bak
-
-This can be run more than once (e.g., for multiple release candidates) and will
-overwrite the changelog section for <new version>. It always has one newline
-at the end and does not mangle changelog sections for previous versions. It
-groups commit types (e.g., feat, fix, refactor) and groups top-level components.
-"""
-
-
-def git(*args):
-    out = subprocess.run(["git"] + list(args), stdout=subprocess.PIPE)
-    return out.stdout.decode("UTF-8").splitlines()
-
-
-def find_last_release_sha():
-    """Finds the commit of the last release
-
-    For the purposes of the changelog, this is the commit where the versions
-    were bumped. This would exclude changes that happened during the release
-    process but were not picked into the release branch.
-    """
-    for commit in git("log", "--pretty=oneline"):
-        if re.search(r" chore: Update versions on", commit):
-            return commit.split(" ")[0]
-
-
-def find_commits_since(begin_sha, end_sha="HEAD"):
-    lines = git("log", "--pretty=oneline", f"{begin_sha}..{end_sha}")
-    return lines
-
-
-def parse_commits(lines):
-    commit_pattern = (
-        r"^(?P<sha>[a-z0-9]{40}) (?P<type>[a-z]+)"
-        r"(\((?P<component>[a-zA-Z0-9/_-]+)\))?:\s*"
-        r"(?P<message>.*)$"
-    )
-
-    out = []
-    for line in lines:
-        parsed = re.search(commit_pattern, line)
-        if parsed:
-            out.append(parsed.groupdict())
-
-    return out
-
-
-def group_commits_by_type(parsed):
-    grouped = {}
-
-    for item in parsed:
-        if item["type"] not in grouped:
-            grouped[item["type"]] = []
-
-        grouped[item["type"]].append(item)
-
-    return grouped
-
-
-def group_commits_by_top_level_component(parsed):
-    grouped = {}
-
-    for item in parsed:
-        component = item["component"]
-        top_level_component = component.split("/")[0] if component else ""
-        if top_level_component not in grouped:
-            grouped[top_level_component] = []
-
-        grouped[top_level_component].append(item)
-
-    return grouped
-
-
-def render_version_content(parsed):
-    grouped = group_commits_by_type(parsed)
-    for category in grouped:
-        grouped[category] = group_commits_by_top_level_component(grouped[category])
-
-    out_lines = []
-    for category in sorted(grouped):
-        if category in ("chore", "ci"):
-            continue
-
-        out_lines.append(f"### {category.capitalize()}")
-        out_lines.append("")
-
-        for component in sorted(grouped[category]):
-            for item in grouped[category][component]:
-                component = item["component"]
-                prefix = f"**{component}**: " if component else ""
-                message = item["message"]
-                out_lines.append(f"- {prefix}{message}")
-
-        out_lines.append("")
-
-    if out_lines[-1] == "":
-        out_lines.pop(-1)
-    return "\n".join(out_lines)
-
-
-def parse_changelog(content):
-    header, content = re.split(r"# nanoarrow Changelog", content)
-    header += "# nanoarrow Changelog"
-    content = content.strip()
-
-    version_split = re.split(r"(^|\n)##\s+nanoarrow ([^\n]*)", content)
-    version_split.pop(0)
-
-    version_content = {}
-    for i in range(0, len(version_split), 3):
-        version_content[version_split[i + 1]] = version_split[i + 2].strip()
-
-    return header, version_content
-
-
-def render_new_changelog(unreleased_version=None, changelog_file=None):
-    sha = find_last_release_sha()
-    commits = find_commits_since(sha)
-    parsed = parse_commits(commits)
-
-    latest_version_content = render_version_content(parsed)
-
-    if changelog_file is None and unreleased_version is None:
-        return latest_version_content
-
-    if changelog_file is None:
-        return f"## nanoarrow {unreleased_version}\n\n" + latest_version_content
-
-    with open(changelog_file) as f:
-        changelog_content = f.read()
-
-    header, version_content = parse_changelog(changelog_content)
-
-    version_content[unreleased_version] = latest_version_content
-
-    out_lines = []
-    out_lines.append(header)
-    out_lines.append("")
-
-    for version, content in version_content.items():
-        out_lines.append(f"## nanoarrow {version}")
-        out_lines.append("")
-        out_lines.append(content)
-        out_lines.append("")
-
-    if out_lines[-1] == "":
-        out_lines.pop(-1)
-    return "\n".join(out_lines)
-
-
-if __name__ == "__main__":
-    import sys
-
-    if len(sys.argv) >= 3:
-        changelog_file = sys.argv[2]
-        unreleased_version = sys.argv[1]
-    elif len(sys.argv) >= 2:
-        changelog_file = None
-        unreleased_version = sys.argv[1]
-    else:
-        changelog_file = None
-        unreleased_version = None
-
-    print(render_new_changelog(unreleased_version, changelog_file))
diff --git a/dev/release/post-02-bump-versions.sh b/dev/release/post-02-bump-versions.sh
index 80245fd..378522f 100755
--- a/dev/release/post-02-bump-versions.sh
+++ b/dev/release/post-02-bump-versions.sh
@@ -48,3 +48,12 @@
 git checkout apache-arrow-nanoarrow-${version} -- CHANGELOG.md
 git commit -m "chore: update changelog for ${version}"
 echo "Updated changelog on branch."
+
+############### Remind to tag main with a dev tag after merge ###############
+
+dev_tag=apache-arrow-nanoarrow-${next_version}.dev
+
+echo "After merging this PR, run:"
+echo "git pull upstream main"
+echo "git tag ${dev_tag} main"
+echo "git push upstream ${dev_tag}"
diff --git a/dev/release/release_tools.py b/dev/release/release_tools.py
new file mode 100644
index 0000000..a46156d
--- /dev/null
+++ b/dev/release/release_tools.py
@@ -0,0 +1,254 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Python implementations of various release tasks
+
+Use `python release_tools.py --help` for usage
+"""
+
+import argparse
+import os
+import re
+import subprocess
+
+
+def git(*args):
+    out = subprocess.run(["git"] + list(args), stdout=subprocess.PIPE)
+    return out.stdout.decode().strip().splitlines()
+
+
+def src_path(*args):
+    release_dir = os.path.dirname(__file__)
+    relative_path = os.path.join(release_dir, "..", "..", *args)
+    return os.path.abspath(relative_path)
+
+
+def file_regex_replace(pattern, replacement, path):
+    with open(path) as f:
+        content = f.read()
+
+    # It is usually good to know if zero items are about to be replaced
+    if re.search(pattern, content) is None:
+        raise ValueError(f"file {path} does not contain pattern '{pattern}'")
+
+    content = re.sub(pattern, replacement, content)
+    with open(path, "w") as f:
+        f.write(content)
+
+
+def find_last_dev_tag():
+    """Finds the commit of the last version bump
+
+    Note that this excludes changes that happened during the release
+    process but were not picked into the release branch.
+    """
+    last_dev_tag = git(
+        "describe", "--match", "apache-arrow-nanoarrow-*.dev", "--tags", "--abbrev=0"
+    )[0]
+    last_version = re.search(r"[0-9]+\.[0-9]+\.[0-9]+", last_dev_tag).group(0)
+    sha = git("rev-list", "-n", "1", last_dev_tag)[0]
+    return last_version, sha
+
+
+def find_commits_since(begin_sha, end_sha="HEAD"):
+    lines = git("log", "--pretty=oneline", f"{begin_sha}..{end_sha}")
+    return lines
+
+
+def add_set_python_dev_version_subparser(subparsers):
+    subparsers.add_parser(
+        "set_python_dev_version",
+        description=(
+            "Set the Python package development version based on "
+            "the number of commits since the last version bump"
+        ),
+    )
+
+
+def set_python_dev_version_command(args):
+    _, last_dev_tag = find_last_dev_tag()
+    dev_distance = len(find_commits_since(last_dev_tag))
+
+    version_file = src_path("python", "src", "nanoarrow", "_static_version.py")
+    file_regex_replace(
+        r'"([0-9]+\.[0-9]+\.[0-9]+)\.dev[0-9]+"',
+        f'"\\1.dev{dev_distance}"',
+        version_file,
+    )
+
+
+def parse_commits(lines):
+    commit_pattern = (
+        r"^(?P<sha>[a-z0-9]{40}) (?P<type>[a-z]+)"
+        r"(\((?P<component>[a-zA-Z0-9/_-]+)\))?:\s*"
+        r"(?P<message>.*)$"
+    )
+
+    out = []
+    for line in lines:
+        parsed = re.search(commit_pattern, line)
+        if parsed:
+            out.append(parsed.groupdict())
+
+    return out
+
+
+def group_commits_by_type(parsed):
+    grouped = {}
+
+    for item in parsed:
+        if item["type"] not in grouped:
+            grouped[item["type"]] = []
+
+        grouped[item["type"]].append(item)
+
+    return grouped
+
+
+def group_commits_by_top_level_component(parsed):
+    grouped = {}
+
+    for item in parsed:
+        component = item["component"]
+        top_level_component = component.split("/")[0] if component else ""
+        if top_level_component not in grouped:
+            grouped[top_level_component] = []
+
+        grouped[top_level_component].append(item)
+
+    return grouped
+
+
+def render_version_content(parsed):
+    grouped = group_commits_by_type(parsed)
+    for category in grouped:
+        grouped[category] = group_commits_by_top_level_component(grouped[category])
+
+    out_lines = []
+    for category in sorted(grouped):
+        if category in ("chore", "ci"):
+            continue
+
+        out_lines.append(f"### {category.capitalize()}")
+        out_lines.append("")
+
+        for component in sorted(grouped[category]):
+            for item in grouped[category][component]:
+                component = item["component"]
+                prefix = f"**{component}**: " if component else ""
+                message = item["message"]
+                out_lines.append(f"- {prefix}{message}")
+
+        out_lines.append("")
+
+    if out_lines[-1] == "":
+        out_lines.pop(-1)
+    return "\n".join(out_lines)
+
+
+def parse_changelog(content):
+    header, content = re.split(r"# nanoarrow Changelog", content)
+    header += "# nanoarrow Changelog"
+    content = content.strip()
+
+    version_split = re.split(r"(^|\n)##\s+nanoarrow ([^\n]*)", content)
+    version_split.pop(0)
+
+    version_content = {}
+    for i in range(0, len(version_split), 3):
+        version_content[version_split[i + 1]] = version_split[i + 2].strip()
+
+    return header, version_content
+
+
+def render_new_changelog(unreleased_version=None, changelog_file=None):
+    _, sha = find_last_dev_tag()
+    commits = find_commits_since(sha)
+    parsed = parse_commits(commits)
+
+    latest_version_content = render_version_content(parsed)
+
+    if changelog_file is None and unreleased_version is None:
+        return latest_version_content
+
+    if changelog_file is None:
+        return f"## nanoarrow {unreleased_version}\n\n" + latest_version_content
+
+    with open(changelog_file) as f:
+        changelog_content = f.read()
+
+    header, version_content = parse_changelog(changelog_content)
+
+    version_content[unreleased_version] = latest_version_content
+
+    out_lines = []
+    out_lines.append(header)
+    out_lines.append("")
+
+    for version, content in version_content.items():
+        out_lines.append(f"## nanoarrow {version}")
+        out_lines.append("")
+        out_lines.append(content)
+        out_lines.append("")
+
+    if out_lines[-1] == "":
+        out_lines.pop(-1)
+    return "\n".join(out_lines)
+
+
+def add_changelog_parser(subparsers):
+    parser = subparsers.add_parser(
+        "changelog", description="Generate and/or append new CHANGELOG.md content"
+    )
+    parser.add_argument(
+        "unreleased_version",
+        nargs="?",
+        help=(
+            "Prepend heading text ## nanoarrow [unreleased_version]) "
+            "to the latest entries"
+        ),
+    )
+    parser.add_argument(
+        "changelog_file",
+        nargs="?",
+        help="If specified, append new changelog content to this file",
+    )
+
+
+def changelog_command(args):
+    print(render_new_changelog(args.unreleased_version, args.changelog_file))
+
+
+if __name__ == "__main__":
+    import sys
+
+    parser = argparse.ArgumentParser(
+        description="Python functions automating various pieces of release tasks",
+    )
+
+    subparsers = parser.add_subparsers(
+        title="subcommands", dest="subcommand", required=True
+    )
+    add_changelog_parser(subparsers)
+    add_set_python_dev_version_subparser(subparsers)
+
+    args = parser.parse_args(sys.argv[1:])
+    if args.subcommand == "changelog":
+        changelog_command(args)
+    elif args.subcommand == "set_python_dev_version":
+        set_python_dev_version_command(args)
diff --git a/dev/release/test_changelog.py b/dev/release/test_release_tools.py
similarity index 78%
rename from dev/release/test_changelog.py
rename to dev/release/test_release_tools.py
index 7e0d41f..94423ac 100644
--- a/dev/release/test_changelog.py
+++ b/dev/release/test_release_tools.py
@@ -19,23 +19,42 @@
 import re
 import tempfile
 
-import changelog
+import pytest
+import release_tools
 
 
 def test_git():
-    git_version = changelog.git("--version")
+    git_version = release_tools.git("--version")
     assert len(git_version) == 1
     assert re.match(r"git version", git_version[0]) is not None
 
 
 def test_find_last_release():
-    last_release = changelog.find_last_release_sha()
+    last_version, last_release = release_tools.find_last_dev_tag()
+    assert re.match(r"[0-9]+\.[0-9]+\.[0-9]+", last_version)
     assert re.match(r"[0-9a-f]{40}", last_release)
 
 
+def test_src_path():
+    release_tools_path = release_tools.src_path("dev", "release", "release_tools.py")
+    assert os.path.exists(release_tools_path)
+
+
+def test_file_regex_replace():
+    with tempfile.TemporaryDirectory() as tempdir:
+        path = os.path.join(tempdir, "test.txt")
+        with open(path, "w") as f:
+            f.write("this file contains something that needs to be replaced")
+
+        release_tools.file_regex_replace(r"something\s+", "nothing ", path)
+
+        with pytest.raises(ValueError):
+            release_tools.file_regex_replace("text does not exist in file", "", path)
+
+
 def test_find_commits_since():
-    last_release = changelog.find_last_release_sha()
-    commits = changelog.find_commits_since(last_release)
+    _, last_release = release_tools.find_last_dev_tag()
+    commits = release_tools.find_commits_since(last_release)
     assert isinstance(commits, list)
     assert len(commits) > 0
 
@@ -43,8 +62,6 @@
         assert isinstance(commit, str)
         assert re.match(r"[0-9a-f]{40}", commit)
 
-    assert last_release in commits[-1]
-
 
 def test_parse_commits():
     commits = [
@@ -53,7 +70,7 @@
         "2" * 40 + " fix(r/sub_dir/sub-dir): A conventional commit with a component",
     ]
 
-    parsed = changelog.parse_commits(commits)
+    parsed = release_tools.parse_commits(commits)
 
     # Non-conventional commits not included (same as cz ch)
     assert len(parsed) == 2
@@ -76,7 +93,7 @@
         {"type": "chore", "sha": "2"},
     ]
 
-    grouped = changelog.group_commits_by_type(parsed)
+    grouped = release_tools.group_commits_by_type(parsed)
     assert list(grouped.keys()) == ["fix", "chore"]
 
     assert len(grouped["fix"]) == 2
@@ -95,7 +112,7 @@
         {"component": "r", "sha": "3"},
     ]
 
-    grouped = changelog.group_commits_by_top_level_component(parsed)
+    grouped = release_tools.group_commits_by_top_level_component(parsed)
 
     assert list(grouped.keys()) == ["", "r"]
     assert len(grouped[""]) == 2
@@ -116,7 +133,7 @@
         {"type": "feat", "component": "r", "message": "message 4"},
     ]
 
-    rendered = changelog.render_version_content(parsed)
+    rendered = release_tools.render_version_content(parsed)
     assert rendered.splitlines() == [
         "### Feat",
         "",
@@ -146,7 +163,7 @@
     ]
 
     content = "\n".join(changelog_lines)
-    header, version_content = changelog.parse_changelog(content)
+    header, version_content = release_tools.parse_changelog(content)
     assert header == "<!-- header stuff we want untouched -->\n\n# nanoarrow Changelog"
 
     assert isinstance(version_content, dict)
@@ -163,10 +180,10 @@
 
 def test_render_new_changelog():
     with tempfile.TemporaryDirectory() as tempdir:
-        changes_no_version = changelog.render_new_changelog()
+        changes_no_version = release_tools.render_new_changelog()
         assert re.match(r"^## nanoarrow", changes_no_version) is None
 
-        changes_with_version = changelog.render_new_changelog("some version info")
+        changes_with_version = release_tools.render_new_changelog("some version info")
         assert re.match(r"^## nanoarrow some version info", changes_with_version)
 
         changelog_file_name = os.path.join(tempdir, "CHANGELOG.md")
@@ -182,13 +199,13 @@
             f.write(changes_with_version)
 
         # Make sure we do not write two version items for the same version
-        modified_changelog = changelog.render_new_changelog(
+        modified_changelog = release_tools.render_new_changelog(
             "some version info", changelog_file_name
         )
         assert len(re.findall(r"\n## nanoarrow", modified_changelog)) == 1
 
         # Make sure do write two version items for different versions
-        modified_changelog = changelog.render_new_changelog(
+        modified_changelog = release_tools.render_new_changelog(
             "other version info", changelog_file_name
         )
         assert len(re.findall(r"\n## nanoarrow", modified_changelog)) == 2
diff --git a/python/src/nanoarrow/ipc.py b/python/src/nanoarrow/ipc.py
index 5102a60..5125f77 100644
--- a/python/src/nanoarrow/ipc.py
+++ b/python/src/nanoarrow/ipc.py
@@ -99,7 +99,7 @@
         <nanoarrow.c_lib.CArrayStream>
         - get_schema(): struct<some_col: int32>
         """
-        if _obj_is_buffer(obj):
+        if not hasattr(obj, "readinto") and _obj_is_buffer(obj):
             close_obj = True
             obj = io.BytesIO(obj)
         else:
diff --git a/python/tests/test_version.py b/python/tests/test_version.py
index 701019c..8de6e2c 100644
--- a/python/tests/test_version.py
+++ b/python/tests/test_version.py
@@ -21,7 +21,7 @@
 
 
 def test_version():
-    re_py_version = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+(\.dev[0-9+])?$")
+    re_py_version = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+(\.dev[0-9]+)?$")
     assert re_py_version.match(na.__version__) is not None