)]}'
{
  "log": [
    {
      "commit": "42bf3d0fb25b9002e9e5d64e668b2dd8e05c489d",
      "tree": "2a3d6b1fa444ddd19cc201a7be9928971eead087",
      "parents": [
        "96cd9a0acee6449d901aa9504de4fa8c7c500b4e"
      ],
      "author": {
        "name": "Jingsong Lee",
        "email": "jingsonglee0@gmail.com",
        "time": "Thu Jun 11 20:45:15 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 11 20:45:15 2026 +0800"
      },
      "message": "[vector] Add unified vector index integration (#8174)"
    },
    {
      "commit": "96cd9a0acee6449d901aa9504de4fa8c7c500b4e",
      "tree": "9b15f491e90fcda94d8e97fdb4eea2eb89a04730",
      "parents": [
        "64b0534786c8295eec2c0cfd5406a084fdbde85e"
      ],
      "author": {
        "name": "Jingsong Lee",
        "email": "jingsonglee0@gmail.com",
        "time": "Thu Jun 11 17:19:59 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 11 17:19:59 2026 +0800"
      },
      "message": "[vector] Support vector search options (#8203)\n\nExpose query-time vector search options through the Java and Python\nvector search APIs, and thread them into Flink, Spark, and Lumina search\nexecution. This lets callers configure index-specific query parameters\nsuch as `ivf.nprobe` and `hnsw.ef_search` at search time."
    },
    {
      "commit": "64b0534786c8295eec2c0cfd5406a084fdbde85e",
      "tree": "b2bc6b70b0078cfbb2107802f9d53472b8bedf60",
      "parents": [
        "14ab97b363739ec78af7a9e3c1650dfd09fe61dc"
      ],
      "author": {
        "name": "yugan",
        "email": "gyustorm@gmail.com",
        "time": "Thu Jun 11 16:30:29 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 11 16:30:29 2026 +0800"
      },
      "message": "[common] Fix BinaryRowSerializer reuse buffer never shrinking (#8160)"
    },
    {
      "commit": "14ab97b363739ec78af7a9e3c1650dfd09fe61dc",
      "tree": "d6b4334e9020e369cb4fa0ec25888e88714f91e3",
      "parents": [
        "9e6ffc80a17b1416e9996a994e45a04af29877ff"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Thu Jun 11 15:58:27 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 11 15:58:27 2026 +0800"
      },
      "message": "[python][ray] Support dynamic_table_options in read_paimon (#8195)"
    },
    {
      "commit": "9e6ffc80a17b1416e9996a994e45a04af29877ff",
      "tree": "1551aca6d6bd8c9dcf9e9ed867f3e0aa06b3052d",
      "parents": [
        "5c54f301f7423699b70529865bf8e85a7c020e02"
      ],
      "author": {
        "name": "Stefanietry",
        "email": "zhou1172026225@gmail.com",
        "time": "Thu Jun 11 13:33:00 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 11 13:33:00 2026 +0800"
      },
      "message": "[spark] Skip empty global index builds (#8202)"
    },
    {
      "commit": "5c54f301f7423699b70529865bf8e85a7c020e02",
      "tree": "5e3a06f79d824dba5fb44883d927e80716f5fa7a",
      "parents": [
        "62d24229169b54c5b41aa2afa943038fd303e0d4"
      ],
      "author": {
        "name": "Stefanietry",
        "email": "zhou1172026225@gmail.com",
        "time": "Thu Jun 11 13:13:28 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 11 13:13:28 2026 +0800"
      },
      "message": "[spark] validate vector search limit range (#8200)"
    },
    {
      "commit": "62d24229169b54c5b41aa2afa943038fd303e0d4",
      "tree": "4a7961f4b1038de985372f192a750ff8c7ffb26a",
      "parents": [
        "5d269282964d02702934cc90c2ddeb5611937c3c"
      ],
      "author": {
        "name": "yugan",
        "email": "gyustorm@gmail.com",
        "time": "Thu Jun 11 13:09:24 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 11 13:09:24 2026 +0800"
      },
      "message": "[common] Fix RowHelper internal buffer never shrinking for large records (#8159)\n\n`RowHelper.reuseWriter` grows its internal `MemorySegment` for large\nrecords (e.g. 100MB+), but `BinaryRowWriter.reset()` only resets the\ncursor without releasing the oversized segment. Additionally,\n`InternalRowSerializer.serialize()` can exit via `EOFException` — a\nnormal signal when the sort buffer is full\n(`SimpleCollectingOutputView.nextSegment()` throws it, caught by\n`BinaryInMemorySortBuffer.write()`) — skipping any cleanup of the\nbloated buffer.\n\nWith many buckets (e.g. 256), each bucket\u0027s writer independently retains\nan inflated buffer: 256 × 100MB+ \u003d tens of GB, causing OOM."
    },
    {
      "commit": "5d269282964d02702934cc90c2ddeb5611937c3c",
      "tree": "6d4e4074d9bd2d26cfca5750c956b5f192113bb3",
      "parents": [
        "6132489ab7e70c3d8121718c8515af13c315699e"
      ],
      "author": {
        "name": "Mao",
        "email": "1684060+mao-liu@users.noreply.github.com",
        "time": "Thu Jun 11 13:13:32 2026 +1000"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 11 11:13:32 2026 +0800"
      },
      "message": "[core][flink] Manifest cache benchmarks + expose more manifest cache options (#8186)"
    },
    {
      "commit": "6132489ab7e70c3d8121718c8515af13c315699e",
      "tree": "e90b3141bdcf7bddae4494370af62978f279b68c",
      "parents": [
        "4ec5c1997e338d28d405c2b47efa05ba7d245ab8"
      ],
      "author": {
        "name": "dependabot[bot]",
        "email": "49699333+dependabot[bot]@users.noreply.github.com",
        "time": "Thu Jun 11 09:35:41 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 11 09:35:41 2026 +0800"
      },
      "message": "Bump shell-quote from 1.8.3 to 1.8.4 in /docs (#8197)"
    },
    {
      "commit": "4ec5c1997e338d28d405c2b47efa05ba7d245ab8",
      "tree": "847a4399fdddc19e26a07973e3df07f304279469",
      "parents": [
        "ded3aada285cd3180d3dab7f5a7111cce250bb26"
      ],
      "author": {
        "name": "Juntao Zhang",
        "email": "juntzhang@foxmail.com",
        "time": "Wed Jun 10 22:58:38 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 10 22:58:38 2026 +0800"
      },
      "message": "[flink] Optimize byte array comparison for sorters (#8192)"
    },
    {
      "commit": "ded3aada285cd3180d3dab7f5a7111cce250bb26",
      "tree": "04db70575c04d4bbc8a702f447b122ac402cbb79",
      "parents": [
        "bbac1b691281daef2e987cd99fd0ddcd5ac8300d"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Wed Jun 10 22:04:33 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 10 22:04:33 2026 +0800"
      },
      "message": "[doc] Add reading blob columns section to daft documentation (#8190)"
    },
    {
      "commit": "bbac1b691281daef2e987cd99fd0ddcd5ac8300d",
      "tree": "fc177e7b3c6591f49c2a7c7bbf48f99c26ea8f16",
      "parents": [
        "f63c1d8b39eaa8b13e5b3b17b0b0b1f77e466de0"
      ],
      "author": {
        "name": "LsomeYeah",
        "email": "94825748+LsomeYeah@users.noreply.github.com",
        "time": "Wed Jun 10 21:13:32 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 10 21:13:32 2026 +0800"
      },
      "message": "[core] Improve file index option validation message (#8194)"
    },
    {
      "commit": "f63c1d8b39eaa8b13e5b3b17b0b0b1f77e466de0",
      "tree": "13ca7698b331c5157bb29d9e6d0dac091470be4c",
      "parents": [
        "7fe42f55deb7b525a868f49c98896299e7df1174"
      ],
      "author": {
        "name": "yuzelin",
        "email": "33053040+yuzelin@users.noreply.github.com",
        "time": "Wed Jun 10 15:11:01 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 10 15:11:01 2026 +0800"
      },
      "message": "[core] Custom tag callback support to be initialized with table (#8189)"
    },
    {
      "commit": "7fe42f55deb7b525a868f49c98896299e7df1174",
      "tree": "af0417027907147e1f4f127bcbd9c8d56ec625e1",
      "parents": [
        "94b468a2b93276a7aa73646658b39552cc6083a7"
      ],
      "author": {
        "name": "LsomeYeah",
        "email": "94825748+LsomeYeah@users.noreply.github.com",
        "time": "Wed Jun 10 14:57:49 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 10 14:57:49 2026 +0800"
      },
      "message": "[format] Support dynamic parquet read batch size (#8188)"
    },
    {
      "commit": "94b468a2b93276a7aa73646658b39552cc6083a7",
      "tree": "0629cfd95c06de62ebc74bcc6bd1ec3807c7b343",
      "parents": [
        "1384d724c6177e53b947d0d8c908744f41a8f005"
      ],
      "author": {
        "name": "YeJunHao",
        "email": "41894543+leaves12138@users.noreply.github.com",
        "time": "Tue Jun 09 19:37:21 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Tue Jun 09 19:37:21 2026 +0800"
      },
      "message": "[core] Drop unsafe global indexes during row-id reassign (#8166)\n\nThis PR updates metadata-only row-id reassignment to drop individual\nglobal index entries whose row ranges cannot be safely rewritten,\ninstead of failing the whole row-id reassignment.\n\nWhen a global index entry\u0027s row range is not fully covered by the\ndata-file row-id mapping, keeping the old entry would be unsafe because\nthe index file still stores row IDs relative to the old range. Dropping\nonly that entry lets row-id reassignment proceed while allowing the\nmissing global index range to be rebuilt later."
    },
    {
      "commit": "1384d724c6177e53b947d0d8c908744f41a8f005",
      "tree": "cbb27b958cd5b7124a287bab12c1c717ede98856",
      "parents": [
        "81a03758c6c5b586124b95676e7d16be07aa9810"
      ],
      "author": {
        "name": "jianguotian",
        "email": "841604084@qq.com",
        "time": "Tue Jun 09 16:29:45 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Tue Jun 09 16:29:45 2026 +0800"
      },
      "message": "[core][mosaic] Support MOSAIC in FormatTable (#8180)"
    },
    {
      "commit": "81a03758c6c5b586124b95676e7d16be07aa9810",
      "tree": "abe242d36dc21bca34df18a6acb056f5c4826045",
      "parents": [
        "773f942441aa6b4a51975bb9190ddda84c83a448"
      ],
      "author": {
        "name": "Jingsong Lee",
        "email": "jingsonglee0@gmail.com",
        "time": "Tue Jun 09 15:40:55 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Tue Jun 09 15:40:55 2026 +0800"
      },
      "message": "[mosaic] Support ARRAY and MAP type validation (#8178)\n\n- Remove `UnsupportedOperationException` for ARRAY and MAP types in\n`MosaicRowTypeVisitor`, allowing these types to pass schema validation\n- Recursively validate child types (element type for ARRAY, key/value\ntypes for MAP)\n- Once paimon-mosaic upstream releases 0.2.0 (which includes ARRAY/MAP\nsupport in the native library), we only need to bump the mosaic\ndependency version"
    },
    {
      "commit": "773f942441aa6b4a51975bb9190ddda84c83a448",
      "tree": "4d1e163a3e83ce712e112e673c15bea96f32d871",
      "parents": [
        "b4d8b657850521e219d97142d9325cfeec2acf9b"
      ],
      "author": {
        "name": "QuakeWang",
        "email": "45645138+QuakeWang@users.noreply.github.com",
        "time": "Tue Jun 09 13:58:45 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Tue Jun 09 13:58:45 2026 +0800"
      },
      "message": "[flink][cdc] Preserve schema id in source checkpoint (#8163)\n\nCDC source enumerator checkpoint currently stores pending splits and\nper-table next snapshot id, but it does not persist the last observed\nschema id for each table.\n\nAfter restore, the next generated split may use `lastSchemaId \u003d null`\neven though the table had already emitted splits with an earlier schema.\nIn that case schema diff generation treats the current schema as a\ncreate-table event instead of the expected schema evolution event, so\ndownstream schema changes can be lost for existing target tables.\n\nThis PR adds explicit per-table checkpoint progress for CDC source\nenumerator:\n\n- stores both `nextSnapshotId` and `schemaId`\n- restores `TableStatus.schemaId` together with the stream scan position\n- upgrades `CDCCheckpoint.Serializer` to v2\n- keeps v1 checkpoint compatibility with missing `schemaId` restored as\n`null`\n- writes the nested split serializer version in the new checkpoint\nformat"
    },
    {
      "commit": "b4d8b657850521e219d97142d9325cfeec2acf9b",
      "tree": "c9fe103990d9fedccac457e9f020535252f9204f",
      "parents": [
        "4bba18669e3aaa16256ffb535d2cbaa04e6e4134"
      ],
      "author": {
        "name": "YeJunHao",
        "email": "41894543+leaves12138@users.noreply.github.com",
        "time": "Tue Jun 09 13:52:47 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Tue Jun 09 13:52:47 2026 +0800"
      },
      "message": "[core][flink] Detect overwrite conflicts for index commits (#7972)"
    },
    {
      "commit": "4bba18669e3aaa16256ffb535d2cbaa04e6e4134",
      "tree": "67dc1160f89e979c70e3db607244f4a17335b71e",
      "parents": [
        "ade1bb8d87d6a4d8de09e2705b5fa4df1c62c28c"
      ],
      "author": {
        "name": "YeJunHao",
        "email": "41894543+leaves12138@users.noreply.github.com",
        "time": "Tue Jun 09 13:51:44 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Tue Jun 09 13:51:44 2026 +0800"
      },
      "message": "[core] Validate schema after applying schema changes (#8063)"
    },
    {
      "commit": "ade1bb8d87d6a4d8de09e2705b5fa4df1c62c28c",
      "tree": "f67e61ed9d13b7afb8bed094a862ea3e6a408c16",
      "parents": [
        "235aac83a557c476d908331edcbd8fbb60322fde"
      ],
      "author": {
        "name": "Stefanietry",
        "email": "zhou1172026225@gmail.com",
        "time": "Tue Jun 09 13:50:32 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Tue Jun 09 13:50:32 2026 +0800"
      },
      "message": "[core] fix non null predicate for blob \u0026 vector (#8177)"
    },
    {
      "commit": "235aac83a557c476d908331edcbd8fbb60322fde",
      "tree": "08d737dd30911465522804ec3cd5d47ba3f74ea8",
      "parents": [
        "b0809bc35db399f74063b6e98b59917811ecfad6"
      ],
      "author": {
        "name": "YeJunHao",
        "email": "41894543+leaves12138@users.noreply.github.com",
        "time": "Tue Jun 09 13:33:37 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Tue Jun 09 13:33:37 2026 +0800"
      },
      "message": "[core] Fix data evolution compact partition filtering (#8169)"
    },
    {
      "commit": "b0809bc35db399f74063b6e98b59917811ecfad6",
      "tree": "33da8dd5927647f939931636dd0d3e6aa0156472",
      "parents": [
        "76412d0e3e6d3dbc0b7b3d956052f372ca67d6bc"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Tue Jun 09 12:13:12 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Tue Jun 09 12:13:12 2026 +0800"
      },
      "message": "[python][ray] Add e2e test and docs for ray merge_into feature backfill (#8172)"
    },
    {
      "commit": "76412d0e3e6d3dbc0b7b3d956052f372ca67d6bc",
      "tree": "648df59ccba1790cab581c918686559515a60e88",
      "parents": [
        "9636052e3d424a976bd5a0a7ccd8e176da966671"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Tue Jun 09 12:10:47 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Tue Jun 09 12:10:47 2026 +0800"
      },
      "message": "[python][daft] Add daft on ray e2e test and documentation (#8173)"
    },
    {
      "commit": "9636052e3d424a976bd5a0a7ccd8e176da966671",
      "tree": "aa175425c565ba251a4ddf58e77a05cb8fbf7319",
      "parents": [
        "3ae1704b0d217a5e9f73722545680efd800feeac"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Mon Jun 08 20:29:53 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Mon Jun 08 20:29:53 2026 +0800"
      },
      "message": "[python] Fix update-by-row-id for list and map column types (#8162)"
    },
    {
      "commit": "3ae1704b0d217a5e9f73722545680efd800feeac",
      "tree": "68f9c4a55a4d44e4c9976a6cbc16be8cd89a7818",
      "parents": [
        "f3c18a85e9a4ce1bb8fa2944ebefc1ca5a808bc5"
      ],
      "author": {
        "name": "Arnav Balyan",
        "email": "60175178+ArnavBalyan@users.noreply.github.com",
        "time": "Mon Jun 08 17:12:23 2026 +0530"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Mon Jun 08 19:42:23 2026 +0800"
      },
      "message": "[hive] Bug fix for stuck thread on hive prepareCommit failure (#8142)\n\n- Today, any hive job writing to Paimon table failing before precommit,\ncauses job to be permanently stuck and does not exit cleanly.\n- This is due to Paimon throwing another exception in the cleanup\nprocess, (when there are no files to be cleaned up). In such scenarios,\nPaimon throws unchecked runtime exception not caught at caller.\n- Causing the parent thread to be permanently stuck waiting job failure\nto arrive.\n- Ensure that we avoid unprotected file access, and can handle\nexceptions when precommit files are not generated.\n- This also fixes an issue of CI being stuck and timing out in some\ncases when the UT has a failure."
    },
    {
      "commit": "f3c18a85e9a4ce1bb8fa2944ebefc1ca5a808bc5",
      "tree": "4a60250aa2d709eb5e2914e1260ae246957c0a4d",
      "parents": [
        "48f9e55d1d647fc821e9624af6a1af2153260cf6"
      ],
      "author": {
        "name": "zhoulii",
        "email": "zhouli.dev@foxmail.com",
        "time": "Mon Jun 08 19:41:19 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Mon Jun 08 19:41:19 2026 +0800"
      },
      "message": "[python] Introduce ResolvingFileIO for pypaimon mirroring Java implementation (#8165)"
    },
    {
      "commit": "48f9e55d1d647fc821e9624af6a1af2153260cf6",
      "tree": "d336d4c4134817f86d763ea8a3ae60ae23dbcc38",
      "parents": [
        "2281765a7f44c643353266efb1f95053f9a059ea"
      ],
      "author": {
        "name": "umi",
        "email": "55790489+discivigour@users.noreply.github.com",
        "time": "Mon Jun 08 19:40:06 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Mon Jun 08 19:40:06 2026 +0800"
      },
      "message": "[core] Minor refactor for manifest sort code  (#8119)"
    },
    {
      "commit": "2281765a7f44c643353266efb1f95053f9a059ea",
      "tree": "67a42dc43159a12da4dcc58495fc9249c3acfade",
      "parents": [
        "1e1cee60d086c8aeaa12fe1b63ca2f2a17b975c3"
      ],
      "author": {
        "name": "Faiz",
        "email": "wxy407679@antgroup.com",
        "time": "Mon Jun 08 19:14:25 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Mon Jun 08 19:14:25 2026 +0800"
      },
      "message": "[python] support chunk shuffle for planning and 3-layer shuffle for pytorch Dataset (#8064)"
    },
    {
      "commit": "1e1cee60d086c8aeaa12fe1b63ca2f2a17b975c3",
      "tree": "374e01c4fa322ed258d233ce1677f08e9b45ba38",
      "parents": [
        "38022feae0daa1dcd8dd2dd2aa449083dc6a13e0"
      ],
      "author": {
        "name": "Zouxxyy",
        "email": "zouxinyu.zxy@alibaba-inc.com",
        "time": "Mon Jun 08 19:12:02 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Mon Jun 08 19:12:02 2026 +0800"
      },
      "message": "[core] Validate file index data type compatibility at table creation time (#8168)"
    },
    {
      "commit": "38022feae0daa1dcd8dd2dd2aa449083dc6a13e0",
      "tree": "10d0fa9736a63640bdfd4277eeddcef46e1e5c4e",
      "parents": [
        "34050f0b8200b531089001473c6bcc2baefedeb5"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Mon Jun 08 18:58:46 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Mon Jun 08 18:58:46 2026 +0800"
      },
      "message": "[python] Fix blob column updates with rolling files (#8156)\n\nBefore this PR, all rolled blob update files were assigned the same\n`first_row_id`. As a result, valid BLOB column updates could fail during\ncommit with `Row ID existence conflict`, because the dedicated blob\nfiles no longer described the correct row-id ranges.\n\nThis PR assigns proper contiguous row-id ranges to rolled blob files and\nrelaxes row-id existence checking for dedicated BLOB files to validate\nrange coverage by normal data files."
    },
    {
      "commit": "34050f0b8200b531089001473c6bcc2baefedeb5",
      "tree": "beca96d1724d5051d64a01cfcd5c417c378f84ad",
      "parents": [
        "ff62c57b59d1ea41bd6c31abdaf3c254da745fbe"
      ],
      "author": {
        "name": "sanshi",
        "email": "43472713+lilei1128@users.noreply.github.com",
        "time": "Mon Jun 08 17:47:15 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Mon Jun 08 17:47:15 2026 +0800"
      },
      "message": "[docs] Add clustering.incremental.mode documentation for incremental clustering (#8161)"
    },
    {
      "commit": "ff62c57b59d1ea41bd6c31abdaf3c254da745fbe",
      "tree": "0b2eff47110956ae850631d33047b2ff45cc1775",
      "parents": [
        "85de16f41a202aa2aa12474da7872ac304eda1e5"
      ],
      "author": {
        "name": "QuakeWang",
        "email": "45645138+QuakeWang@users.noreply.github.com",
        "time": "Mon Jun 08 13:27:09 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Mon Jun 08 13:27:09 2026 +0800"
      },
      "message": "[python][ray] Abort worker writes on failure (#8124)\n\nRay write tasks previously closed on the worker-side `TableWrite` when\nwrite, prepare, or close failed. That is unsafe because close can flush\npending data, and `prepare_commit()` can materialize normal, blob, or\nvector files before a later failure prevents the driver commit.\n\nThis PR makes worker-side Ray writes abort on failure and propagates\nabort through `TableWrite` and `FileStoreWrite`. It also keeps dedicated\nblob/vector metadata reachable from the parent writer so abort can\ndelete files produced before the failed commit path."
    },
    {
      "commit": "85de16f41a202aa2aa12474da7872ac304eda1e5",
      "tree": "f442117bfea7efff36d66902b7a51bf5c174d470",
      "parents": [
        "5dda7da972ab0f9c4a6a4c9a8c86438e33430df0"
      ],
      "author": {
        "name": "yugan",
        "email": "gyustorm@gmail.com",
        "time": "Mon Jun 08 13:25:28 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Mon Jun 08 13:25:28 2026 +0800"
      },
      "message": "[common] Fix integer overflow in HeapBytesVector.reserveBytes() (#8158)\n\n`HeapBytesVector.reserveBytes()` computes `newCapacity * 2` using plain\n`int` multiplication. When `newCapacity` exceeds ~1.07 billion bytes,\nthis overflows `Integer.MAX_VALUE`, causing `NegativeArraySizeException`\nor silent data corruption during compaction reads of large records."
    },
    {
      "commit": "5dda7da972ab0f9c4a6a4c9a8c86438e33430df0",
      "tree": "98f7cd25c41bd59473b60dd6c5cc2354df58438b",
      "parents": [
        "8dda186de4612ec98c1dcc27da006a940e58e9ed"
      ],
      "author": {
        "name": "QuakeWang",
        "email": "45645138+QuakeWang@users.noreply.github.com",
        "time": "Mon Jun 08 12:09:10 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Mon Jun 08 12:09:10 2026 +0800"
      },
      "message": "[mosaic] Skip row group read for all-missing projection (#8157)\n\nWhen all projected columns are missing from a Mosaic file due to schema\nevolution, the reader currently does not push any native projection. It\nstill calls `readRowGroup`, which may read physical file columns even\nthough the final result only contains\n  NULL values.\n\nThis PR adds a Mosaic reader fast path for all-missing projections. The\nreader still uses row group metadata for row counts and predicate\npruning, but returns an all-NULL batch directly when the row group\nmatches, avoiding the native row group read."
    },
    {
      "commit": "8dda186de4612ec98c1dcc27da006a940e58e9ed",
      "tree": "50f0ec8cd9e6c300edabe18e2802e920dcb6949d",
      "parents": [
        "6508aba45912c637586313aed678b1b2503253c9"
      ],
      "author": {
        "name": "Jordan Epstein",
        "email": "32082339+jordepic@users.noreply.github.com",
        "time": "Sun Jun 07 20:58:50 2026 -0500"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Mon Jun 08 09:58:50 2026 +0800"
      },
      "message": "[flink] Fix RemoteTableQuery key serializer to use trimmed primary keys (#8145) (#8146)"
    },
    {
      "commit": "6508aba45912c637586313aed678b1b2503253c9",
      "tree": "ee423ff4e0fcb2a593b4ba6aa3880c3a81949af5",
      "parents": [
        "086c47739eefc9c3a95c81abe176b40a5fd8ff6f"
      ],
      "author": {
        "name": "Juntao Zhang",
        "email": "juntzhang@foxmail.com",
        "time": "Mon Jun 08 08:58:46 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Mon Jun 08 08:58:46 2026 +0800"
      },
      "message": "[core] Fix chain table anchor lookup for multi partition keys (#8154)"
    },
    {
      "commit": "086c47739eefc9c3a95c81abe176b40a5fd8ff6f",
      "tree": "73bd60e017167d6dcf15a0fca2bb2529ebdfd9fd",
      "parents": [
        "29561ea43acac5878adc361ded2071a46f994fd1"
      ],
      "author": {
        "name": "QuakeWang",
        "email": "45645138+QuakeWang@users.noreply.github.com",
        "time": "Mon Jun 08 08:35:31 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Mon Jun 08 08:35:31 2026 +0800"
      },
      "message": "[mosaic] Clean up reader resources on close (#8152)"
    },
    {
      "commit": "29561ea43acac5878adc361ded2071a46f994fd1",
      "tree": "b1b9cb487b000fb79e487167213a6dd18bf6c326",
      "parents": [
        "6b887dbd54900132f2c4443aa4b6cdbc7bfb9bf1"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Sun Jun 07 22:53:08 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sun Jun 07 22:53:08 2026 +0800"
      },
      "message": "[python] Fix partial update for normal column on blob/vector tables (#8147)"
    },
    {
      "commit": "6b887dbd54900132f2c4443aa4b6cdbc7bfb9bf1",
      "tree": "4f6a4a349a31c6044f1726ec352db2609ddb5cdb",
      "parents": [
        "1c9c55b9a88a8b593113dd66a52294af78e33d72"
      ],
      "author": {
        "name": "Junrui Lee",
        "email": "jrlee.ljr@gmail.com",
        "time": "Sun Jun 07 21:14:24 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sun Jun 07 21:14:24 2026 +0800"
      },
      "message": "[spark] Support FROM (query) export in COPY INTO location (#8096)\n\nExtend `COPY INTO \u003clocation\u003e` (export) to accept an inline query as the\nsource, not just a table:\n\n```sql\nCOPY INTO \u0027/export/active_users/\u0027\nFROM (SELECT id, name FROM my_db.users WHERE active \u003d TRUE)\nFILE_FORMAT \u003d (TYPE \u003d CSV, HEADER \u003d TRUE);\n```\n\nPreviously only `FROM table_name` was supported. The inline query is\nparsed through the\nsession (Paimon) parser, so it behaves exactly like the same query run\nvia `spark.sql`,\nincluding Paimon parser rules such as the v1 function rewrite."
    },
    {
      "commit": "1c9c55b9a88a8b593113dd66a52294af78e33d72",
      "tree": "5c3085aac80439913427758e744d3c6af6d81880",
      "parents": [
        "b53da94ee277bd5cc7aaf83af8f5f2a82d7786c0"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Sun Jun 07 21:12:22 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sun Jun 07 21:12:22 2026 +0800"
      },
      "message": "[python] Fix Python CI failure after Daft 0.7.15 release (#8151)\n\nPython CI installs the latest Daft version. After Daft 0.7.15 was\nreleased, blob tests started failing\nbecause Daft now expects the File range field to be `position` instead\nof `offset`.\n\nPyPaimon still generated `offset` for blob File values, so this PR\nchooses the field name by Daft version\n  and keeps tests compatible with both old and new Daft versions."
    },
    {
      "commit": "b53da94ee277bd5cc7aaf83af8f5f2a82d7786c0",
      "tree": "b41fa36251a86190bee959602915e8725b513be7",
      "parents": [
        "a8699b382a496fc456cb302836def78078eafb98"
      ],
      "author": {
        "name": "lxy",
        "email": "38709059+lxy-9602@users.noreply.github.com",
        "time": "Sun Jun 07 19:52:54 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sun Jun 07 19:52:54 2026 +0800"
      },
      "message": "[core] Fix BSI reader predicate pruning for Long.MIN_VALUE boundary (#8150)"
    },
    {
      "commit": "a8699b382a496fc456cb302836def78078eafb98",
      "tree": "f426b07086bab260253406a42389c873ad8ba4a8",
      "parents": [
        "2ea97ae948564f4c9bb40e10f364f7423666f46c"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Sun Jun 07 19:51:41 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sun Jun 07 19:51:41 2026 +0800"
      },
      "message": "[python][ray] Optimize merge into self-merge updates on data evolution table (#8141)\n\nWhen source and target are the same table in `merge_into` (self-merge),\nskip the join step and read the target table directly with\n`_ROW_ID` projection. This avoids a full table join and significantly\nimproves performance for self-merge update scenarios on\n  data-evolution tables."
    },
    {
      "commit": "2ea97ae948564f4c9bb40e10f364f7423666f46c",
      "tree": "7acddf02aff4926e5ec2828f0be35dbcb68788b7",
      "parents": [
        "cf5db9970aa084302be8be4155be694f54979bc0"
      ],
      "author": {
        "name": "QuakeWang",
        "email": "45645138+QuakeWang@users.noreply.github.com",
        "time": "Sun Jun 07 19:50:25 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sun Jun 07 19:50:25 2026 +0800"
      },
      "message": "[mosaic] Clean up reader resources on construction failure (#8144)"
    },
    {
      "commit": "cf5db9970aa084302be8be4155be694f54979bc0",
      "tree": "30e2acba20269d43214458e2f786af205430abc4",
      "parents": [
        "f2376a8682ce4fd24694e75c98254c69dc56582b"
      ],
      "author": {
        "name": "chaoyang",
        "email": "chaoyang@apache.org",
        "time": "Sat Jun 06 16:57:03 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 16:57:03 2026 +0800"
      },
      "message": "[python] Align read-time schema evolution by field id (#8126)\n\nRead-time schema evolution matched a data file\u0027s columns to the read\nschema by **name** rather than by **field id**. So after a column rename\nthe old file\u0027s column became unreachable and its rows read `NULL`, and\ndropping a column then re-adding the same name revived the dropped\ncolumn\u0027s stale data.\n\nAlign per file by field id instead: read each file under its own field\nnames/types and normalize to the latest read schema by id — a rename\nfollows the id, and a re-added name gets a fresh id and pads `NULL`.\nNon-evolving reads stay zero-copy.\n\nAdds regression tests for rename / drop-then-readd / swap across\nappend-only, primary-key (merge engines) and nested top-level columns."
    },
    {
      "commit": "f2376a8682ce4fd24694e75c98254c69dc56582b",
      "tree": "35fce7c762302c2ba600882d7d41ca8f8a6fbd1a",
      "parents": [
        "625fd3fef7ada4e2dbc40d9824875d2a51f6edc4"
      ],
      "author": {
        "name": "QuakeWang",
        "email": "45645138+QuakeWang@users.noreply.github.com",
        "time": "Sat Jun 06 16:50:23 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 16:50:23 2026 +0800"
      },
      "message": "[mosaic] Clean up writer resources on construction failure (#8129)"
    },
    {
      "commit": "625fd3fef7ada4e2dbc40d9824875d2a51f6edc4",
      "tree": "2b871a52e0ddd712c15542b6e033e548e30a3bb6",
      "parents": [
        "eef86147eeae1cb8df09af7f362ecb1190f99f3e"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Sat Jun 06 16:49:15 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 16:49:15 2026 +0800"
      },
      "message": "[python][ray] Fix overly strict rejection of matched updates on partitioned tables (#8127)"
    },
    {
      "commit": "eef86147eeae1cb8df09af7f362ecb1190f99f3e",
      "tree": "9c956490630312394d15edbdf2d59e56ffc2a7f8",
      "parents": [
        "c24830e3a598e1c1ece21a7557face3cfc8155e7"
      ],
      "author": {
        "name": "Mao",
        "email": "1684060+mao-liu@users.noreply.github.com",
        "time": "Sat Jun 06 18:49:05 2026 +1000"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 16:49:05 2026 +0800"
      },
      "message": "[flink] Writer coordinator for compaction (#8128)"
    },
    {
      "commit": "c24830e3a598e1c1ece21a7557face3cfc8155e7",
      "tree": "b440bbdf55209a71a838c4d503a51d4947e318cd",
      "parents": [
        "31daab79c74d1dedc6735ea42d9a0f577fbd8428"
      ],
      "author": {
        "name": "Arnav Balyan",
        "email": "60175178+ArnavBalyan@users.noreply.github.com",
        "time": "Sat Jun 06 14:18:07 2026 +0530"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 16:48:07 2026 +0800"
      },
      "message": "[core] Don\u0027t allow full compaction with lookup changelog producer (#7999)"
    },
    {
      "commit": "31daab79c74d1dedc6735ea42d9a0f577fbd8428",
      "tree": "ffcf28ffffeef256786d61ad26dcc04a8219b800",
      "parents": [
        "36d5634ec018d331a94e9ca949cede5b6375b709"
      ],
      "author": {
        "name": "Shekhar Prasad Rajak",
        "email": "5774448+Shekharrajak@users.noreply.github.com",
        "time": "Sat Jun 06 14:16:05 2026 +0530"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 16:46:05 2026 +0800"
      },
      "message": "feat: partition archive support - initial version (#8121)"
    },
    {
      "commit": "36d5634ec018d331a94e9ca949cede5b6375b709",
      "tree": "ae2f574403ae29eccdf0e61eb023627691a44d09",
      "parents": [
        "219cecd46eb8b60ca8a8c1cf8cbb3c5d983d1fde"
      ],
      "author": {
        "name": "Jordan Epstein",
        "email": "32082339+jordepic@users.noreply.github.com",
        "time": "Sat Jun 06 03:45:02 2026 -0500"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 16:45:02 2026 +0800"
      },
      "message": "[core] Download remote lookup files in LocalTableQuery reads (#8123) (#8125)"
    },
    {
      "commit": "219cecd46eb8b60ca8a8c1cf8cbb3c5d983d1fde",
      "tree": "84f992b5adc2481eee9f9d1d7f89d17348871965",
      "parents": [
        "2686d48fc3b97b6ba326a444fcfacf6e419c89f0"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Sat Jun 06 16:43:07 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 16:43:07 2026 +0800"
      },
      "message": "[python][ray] Support multi-clause fall-through in merge_into (#8115)"
    },
    {
      "commit": "2686d48fc3b97b6ba326a444fcfacf6e419c89f0",
      "tree": "41232d2ec876c3ea2016018318d5f406328d5392",
      "parents": [
        "ecc66657383a4d4c16b8fc43884d0a4fb2924b50"
      ],
      "author": {
        "name": "Arnav Balyan",
        "email": "60175178+ArnavBalyan@users.noreply.github.com",
        "time": "Sat Jun 06 14:12:46 2026 +0530"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 16:42:46 2026 +0800"
      },
      "message": "[core] Fix data corruption for FieldSumAgg (#7992)"
    },
    {
      "commit": "ecc66657383a4d4c16b8fc43884d0a4fb2924b50",
      "tree": "af4f5db48c2150c1324a39df25cedf853d915e95",
      "parents": [
        "a39601510945c2baf0b75bf9d80a99a50eb0e0f5"
      ],
      "author": {
        "name": "Arnav Balyan",
        "email": "60175178+ArnavBalyan@users.noreply.github.com",
        "time": "Sat Jun 06 14:10:48 2026 +0530"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 16:40:48 2026 +0800"
      },
      "message": "[docs] Fix typo and add unsupported example for unsupported config (#8057)"
    },
    {
      "commit": "a39601510945c2baf0b75bf9d80a99a50eb0e0f5",
      "tree": "8b97b8f1a7b9155b8a64900f58e48171c1461f6d",
      "parents": [
        "09696214535da777cb83fafcd734b5fc364f10b2"
      ],
      "author": {
        "name": "yuxia Luo",
        "email": "luoyuxia@alumni.sjtu.edu.cn",
        "time": "Sat Jun 06 16:38:02 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 16:38:02 2026 +0800"
      },
      "message": "[arrow] Fix Arrow to Paimon vector conversion casts (#8138)"
    },
    {
      "commit": "09696214535da777cb83fafcd734b5fc364f10b2",
      "tree": "18df5267b0703ac21fb463d2bc118ac5cfd4706d",
      "parents": [
        "0e13c28ce2a6624b3d470416a3b23a7a4661348c"
      ],
      "author": {
        "name": "YeJunHao",
        "email": "41894543+leaves12138@users.noreply.github.com",
        "time": "Sat Jun 06 16:37:43 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 16:37:43 2026 +0800"
      },
      "message": "[core] Improve BTree global index option handling (#8137)"
    },
    {
      "commit": "0e13c28ce2a6624b3d470416a3b23a7a4661348c",
      "tree": "a3bcbe6cfb03c54b31b770b2cd4d03b94d70290d",
      "parents": [
        "df4b475ec6cbc7ede22b61c84edc79103c56c584"
      ],
      "author": {
        "name": "YeJunHao",
        "email": "41894543+leaves12138@users.noreply.github.com",
        "time": "Sat Jun 06 16:37:26 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 16:37:26 2026 +0800"
      },
      "message": "[core] Fix BTree index meta for empty keys (#8140)\n\nBTreeIndexMeta used zero-length encoded keys to represent null\nboundaries. This collides with valid empty serialized keys, for example\nan empty string, and can deserialize BTree metadata with a null boundary\nkey. Readers or compactors that wrap the boundary key can then hit a\nNullPointerException."
    },
    {
      "commit": "df4b475ec6cbc7ede22b61c84edc79103c56c584",
      "tree": "650e17b4a38ee21d17175f1e74cf0f612de9e500",
      "parents": [
        "4bdaa76c882c8ab6f742fa0d4ff5d2d00fed1d33"
      ],
      "author": {
        "name": "junmuz",
        "email": "4795269+junmuz@users.noreply.github.com",
        "time": "Sat Jun 06 09:36:29 2026 +0100"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 16:36:29 2026 +0800"
      },
      "message": "[python] Generate input changelogs from Python writer (#7739)"
    },
    {
      "commit": "4bdaa76c882c8ab6f742fa0d4ff5d2d00fed1d33",
      "tree": "d74e52ebe2445869add4d2eae9888f8f7a5bd6bd",
      "parents": [
        "cc9bb8e165be69c05e615e008d1b9e1a4d26b187"
      ],
      "author": {
        "name": "yunfengzhou-hub",
        "email": "yuri.zhouyunfeng@outlook.com",
        "time": "Sat Jun 06 09:22:51 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Sat Jun 06 09:22:51 2026 +0800"
      },
      "message": "[flink] Support batch read/write for chain table (#8122)"
    },
    {
      "commit": "cc9bb8e165be69c05e615e008d1b9e1a4d26b187",
      "tree": "d7008f2f6b00659b8bc96fb50d8c376a95255ebf",
      "parents": [
        "d13301ccd0ad841e1fb4315e53a20b81f4ac2cec"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Fri Jun 05 17:39:20 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Fri Jun 05 17:39:20 2026 +0800"
      },
      "message": "[python] Fix manifest read failure when _WRITE_COLS contains system fields (#8131)\n\n### Purpose\nWhen reading a table whose data files have `_WRITE_COLS` containing\nsystem fields (e.g. `_ROW_ID`, `_SEQUENCE_NUMBER`), the read\n  fails with:\n  KeyError: \u0027_ROW_ID\u0027\n\nAligns with the Java-side fix in #7797 — skip metadata fields that are\nnot in the table schema when resolving value stats fields from\n`_WRITE_COLS`.\n\n  ## Test\n\n  - `test_read_write_cols_with_system_field`"
    },
    {
      "commit": "d13301ccd0ad841e1fb4315e53a20b81f4ac2cec",
      "tree": "6df0adfcb5ad7d90474482ca047a86da4587e628",
      "parents": [
        "3b639af55da0d9bbf0bc5e9df1e85253b1a2c9b8"
      ],
      "author": {
        "name": "Stefanietry",
        "email": "zhou1172026225@gmail.com",
        "time": "Fri Jun 05 15:52:09 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Fri Jun 05 15:52:09 2026 +0800"
      },
      "message": "[spark] support distributed execution of vector search on spark (#8108)\n\nPurpose: Currently, vector search operation is executed on a single node\nwithin the driver, which may lead to performance bottlenecks when\ndealing with large amounts of data. This issue aims to implement a\ndistributed execution capability."
    },
    {
      "commit": "3b639af55da0d9bbf0bc5e9df1e85253b1a2c9b8",
      "tree": "8c3f1f643b7591bef7cfee82b640ad7415e42a64",
      "parents": [
        "08ce6b26be8366eb0172643379a15a2c6ff6cf27"
      ],
      "author": {
        "name": "Stefanietry",
        "email": "zhou1172026225@gmail.com",
        "time": "Fri Jun 05 11:42:40 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Fri Jun 05 11:42:40 2026 +0800"
      },
      "message": "[spark] support persist source data to avoid loading data repeatedly (#8081)"
    },
    {
      "commit": "08ce6b26be8366eb0172643379a15a2c6ff6cf27",
      "tree": "15746100588212344f57b7b3f9ff10231669ea58",
      "parents": [
        "66c2b9caeabdb5c5b9c54bbcd5a4d75984d17981"
      ],
      "author": {
        "name": "junmuz",
        "email": "4795269+junmuz@users.noreply.github.com",
        "time": "Fri Jun 05 03:20:03 2026 +0100"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Fri Jun 05 10:20:03 2026 +0800"
      },
      "message": "Add lastCommittedSnapshotId commit metric and document missing metrics (#7589)"
    },
    {
      "commit": "66c2b9caeabdb5c5b9c54bbcd5a4d75984d17981",
      "tree": "4336e48435968e2a2cb483898af64cb03c5fe9f6",
      "parents": [
        "3eb5a4da5f631f4dff661dcae500f32efbd4309f"
      ],
      "author": {
        "name": "Jiajia Li",
        "email": "plusplusjiajia@alibaba-inc.com",
        "time": "Thu Jun 04 16:51:01 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 16:51:01 2026 +0800"
      },
      "message": "[arrow] Fix TIMESTAMP_LTZ Arrow timezone to use UTC instead of system default (#7364)\n\nLocalZonedTimestampType stores UTC timestamps by definition. However,\nArrowFieldTypeConversion used ZoneId.systemDefault() as the\nArrowTimestamp timezone."
    },
    {
      "commit": "3eb5a4da5f631f4dff661dcae500f32efbd4309f",
      "tree": "cab2af6fc6fea3cca21945f844365fccef0c7219",
      "parents": [
        "68cf3bca10f2891cee2c1c9769669e0d1cd3765a"
      ],
      "author": {
        "name": "QuakeWang",
        "email": "45645138+QuakeWang@users.noreply.github.com",
        "time": "Thu Jun 04 16:36:30 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 16:36:30 2026 +0800"
      },
      "message": "[python][ray] Preserve schema for empty reads (#8118)\n\nThe top-level Ray `read_paimon` API planned reads through\n`RayDatasource`. When a table scan produced no splits,\n`RayDatasource.get_read_tasks()` returned no read tasks, so Ray could\ncreate an empty dataset without the Paimon table schema.\n\nThis was inconsistent with `TableRead.to_ray()`, which already returns\nan empty Arrow-backed Ray dataset with the planned read schema.\n\nThis PR makes `read_paimon` use the planned `read_type` to build an\nempty Arrow table when there are no splits, so empty reads preserve\nschema and projection. It also lazily imports `ray.data` and reports an\nactionable `pypaimon[ray]` install hint when Ray is missing."
    },
    {
      "commit": "68cf3bca10f2891cee2c1c9769669e0d1cd3765a",
      "tree": "e58a067e010a3de559bb9bd6751911ee8b457332",
      "parents": [
        "3721ae0f88c4b739566215f0af7db96238d1a620"
      ],
      "author": {
        "name": "Junrui Lee",
        "email": "jrlee.ljr@gmail.com",
        "time": "Thu Jun 04 16:29:32 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 16:29:32 2026 +0800"
      },
      "message": "[core] Support snapshot-based sequence ordering for primary-key tables (#7832)"
    },
    {
      "commit": "3721ae0f88c4b739566215f0af7db96238d1a620",
      "tree": "076d40f15d153399780da80957bfcfbb52600c08",
      "parents": [
        "a1d255bf8da9d1d58da0c4748ca6149107f67f4c"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Thu Jun 04 14:30:11 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 14:30:11 2026 +0800"
      },
      "message": "[python] Fix tantivy full-text index schema mismatch  (#8113)"
    },
    {
      "commit": "a1d255bf8da9d1d58da0c4748ca6149107f67f4c",
      "tree": "475cf0585302552724ab5b4904f2efc27f7d4b6b",
      "parents": [
        "2b4f24ff39432da46e28884b423a74d29e8cf7b5"
      ],
      "author": {
        "name": "Junrui Lee",
        "email": "jrlee.ljr@gmail.com",
        "time": "Thu Jun 04 14:19:36 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 14:19:36 2026 +0800"
      },
      "message": "[python] Add StartupMode enum and scan.mode option to CoreOptions (#7900)"
    },
    {
      "commit": "2b4f24ff39432da46e28884b423a74d29e8cf7b5",
      "tree": "7268a7af133dc94262dffbf4a50cd9a4755de1b9",
      "parents": [
        "ba4d76da89cfd4f2015479a3c5436901d9a281de"
      ],
      "author": {
        "name": "Arnav Balyan",
        "email": "60175178+ArnavBalyan@users.noreply.github.com",
        "time": "Thu Jun 04 11:26:16 2026 +0530"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 13:56:16 2026 +0800"
      },
      "message": "[hive] Fix insert into static partitions on managed Paimon tables (#7824)"
    },
    {
      "commit": "ba4d76da89cfd4f2015479a3c5436901d9a281de",
      "tree": "7ab0d21e8ab8d1a8fa578fc6dd8a358f16032ba4",
      "parents": [
        "4e54917b06cd603d9e6f3cbb79fec0694b3ae69e"
      ],
      "author": {
        "name": "umi",
        "email": "55790489+discivigour@users.noreply.github.com",
        "time": "Thu Jun 04 13:54:46 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 13:54:46 2026 +0800"
      },
      "message": "[core] Support manifest sort feature when commit (#7842)"
    },
    {
      "commit": "4e54917b06cd603d9e6f3cbb79fec0694b3ae69e",
      "tree": "c5e8020ef081aec9b1819759e865b02cf72a32b4",
      "parents": [
        "40eadc2f0133d2859f85cc6b1e0de35433234846"
      ],
      "author": {
        "name": "duanyyyyyyy",
        "email": "139062392+duanyyyyyyy@users.noreply.github.com",
        "time": "Thu Jun 04 13:53:43 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 13:53:43 2026 +0800"
      },
      "message": "[core] Fix DataEvolutionFileStoreScan schema-evolution filtering (#8084)"
    },
    {
      "commit": "40eadc2f0133d2859f85cc6b1e0de35433234846",
      "tree": "e82415314c228b9212cf2dbc00c8322845511d7c",
      "parents": [
        "d78babb99fe7d25a72b002624ee0570a5a2c47c9"
      ],
      "author": {
        "name": "umi",
        "email": "55790489+discivigour@users.noreply.github.com",
        "time": "Thu Jun 04 13:52:19 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 13:52:19 2026 +0800"
      },
      "message": "[python] Support BlobView feature (#8021)\n\n- Add Python BlobViewStruct / BlobView wire-format support and the\nblob-view-field option.\n- Store descriptor/view BLOB fields inline, validate bad inline field\nconfiguration and payloads, and avoid writing new .blob files for view\nfields.\n- Resolve blob-view fields during reads through catalog-aware lookup,\nreturning bytes by default or upstream BlobDescriptor bytes when\nblob-as-descriptor\u003dtrue."
    },
    {
      "commit": "d78babb99fe7d25a72b002624ee0570a5a2c47c9",
      "tree": "dc030a495d5ebfba4cc54f779429f7c40a4d4bf5",
      "parents": [
        "1d56d2d235b2f63882daf5b6c8c22a29c9ee40a0"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Thu Jun 04 13:51:13 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 13:51:13 2026 +0800"
      },
      "message": "[python][ray] Support partial SET and INSERT in merge_into (#8085)"
    },
    {
      "commit": "1d56d2d235b2f63882daf5b6c8c22a29c9ee40a0",
      "tree": "f36040c60d02f29d17f4c727232fbaab5605f706",
      "parents": [
        "fbab26b1090192d4e467855e90b85ffc576da03a"
      ],
      "author": {
        "name": "QuakeWang",
        "email": "45645138+QuakeWang@users.noreply.github.com",
        "time": "Thu Jun 04 13:51:04 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 13:51:04 2026 +0800"
      },
      "message": "[core] Fix flaky duplicate file discard test (#8106)"
    },
    {
      "commit": "fbab26b1090192d4e467855e90b85ffc576da03a",
      "tree": "3728ca5d659dd805a84f748e7104da1d1777f7a1",
      "parents": [
        "0824fe77b16cc50a2988f1322b4a8a33a3691874"
      ],
      "author": {
        "name": "chaoyang",
        "email": "chaoyang@apache.org",
        "time": "Thu Jun 04 13:50:51 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 13:50:51 2026 +0800"
      },
      "message": "[python] In-memory merge buffer for primary-key writer (#7759)"
    },
    {
      "commit": "0824fe77b16cc50a2988f1322b4a8a33a3691874",
      "tree": "b8bf095d301220db121d76ac7e6dca95afa9c630",
      "parents": [
        "0eb9011fc824d39b9827f994a561c52707f62161"
      ],
      "author": {
        "name": "Kerwin Zhang",
        "email": "xiyu.zk@alibaba-inc.com",
        "time": "Thu Jun 04 13:44:30 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 13:44:30 2026 +0800"
      },
      "message": "[spark] Support V2 DML for row-tracking append-only tables (#8094)"
    },
    {
      "commit": "0eb9011fc824d39b9827f994a561c52707f62161",
      "tree": "102a0f0d36cd6a7655fcb8d851d48746ff875f45",
      "parents": [
        "65846f8ee6bfcb5eaeb0653469ed768a841af879"
      ],
      "author": {
        "name": "Colin",
        "email": "hansichan.crypto@gmail.com",
        "time": "Thu Jun 04 13:33:07 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 13:33:07 2026 +0800"
      },
      "message": "[python] Support JDBC catalog (#7720)\n\nSupport JDBC catalog in PyPaimon. This adds a Python JDBC catalog\nimplementation that uses the same catalog metadata tables as Java Paimon\nJDBC catalog: `paimon_tables`, `paimon_database_properties`, and\n`paimon_table_properties`.\n\nThe implementation supports SQLite with the Python standard library and\ndynamically supports MySQL/PostgreSQL when a corresponding Python DB-API\ndriver is installed. Table data and schema files continue to use\nexisting PyPaimon `FileIO` and `SchemaManager` behavior."
    },
    {
      "commit": "65846f8ee6bfcb5eaeb0653469ed768a841af879",
      "tree": "07c3333d6624f08ebaf80493f496f182a7c75cfd",
      "parents": [
        "b5cbf23d0398693d5efbc204e58aac80bb16edc8"
      ],
      "author": {
        "name": "Rhett CfZhuang",
        "email": "dark.momo985@gmail.com",
        "time": "Thu Jun 04 12:18:54 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 12:18:54 2026 +0800"
      },
      "message": "[core] Add validation to prevent primary key in sequence-group (#7052) (#7656)\n\nWhen a primary key field is configured in a sequence-group of\npartial-update merge engine, it causes Parquet decoding failures during\ncompaction because the key field may be set to null. This commit adds\nearly validation at configuration parsing time to reject such invalid\nconfigurations with a clear error message."
    },
    {
      "commit": "b5cbf23d0398693d5efbc204e58aac80bb16edc8",
      "tree": "7a3a34220d7a59084b09d8e414ed619e1ab878cc",
      "parents": [
        "4d8000bc8537cb2f07d1bc5cd1897ca84a27b5a3"
      ],
      "author": {
        "name": "Silas",
        "email": "yhlunar@qq.com",
        "time": "Thu Jun 04 12:15:13 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 12:15:13 2026 +0800"
      },
      "message": "[spark] SparkFilterConverter: support AlwaysTrue/False, fix silent NaN drop (#8060)"
    },
    {
      "commit": "4d8000bc8537cb2f07d1bc5cd1897ca84a27b5a3",
      "tree": "d2f0c79d3472267b3933e2e9a66146b003b953bd",
      "parents": [
        "8186094f9b14fcf69317972b6dbfee31d30e6d3f"
      ],
      "author": {
        "name": "Kerwin Zhang",
        "email": "xiyu.zk@alibaba-inc.com",
        "time": "Thu Jun 04 12:14:43 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 12:14:43 2026 +0800"
      },
      "message": "[spark] Harden dynamic overwrite against optimized child plans (#8052)\n\n`PaimonDynamicPartitionOverwriteCommand` exposes its child query to\nSpark optimizer through `V2WriteCommand`, but later wraps the same query\nback into a Dataset in `run()` before passing it to\n`WriteIntoPaimonTable`.This is fragile when the child query has already\nbeen optimized by Spark. The optimized plan may contain\noptimizer/planner-side placeholders, such as `DynamicPruningSubquery`,\nwhich are not ideal to expose again to writer-side Dataset operations.\n\nThis PR makes the command-to-writer boundary more robust for the dynamic\npartition overwrite fallback path. Before passing the query to\n`WriteIntoPaimonTable`, it converts the child query into an RDD-backed\nDataFrame via `createNewDataFrame(createDataset(...))`. As a result, the\nwriter consumes a clean logical plan instead of directly consuming the\npossibly optimized child plan."
    },
    {
      "commit": "8186094f9b14fcf69317972b6dbfee31d30e6d3f",
      "tree": "9c3d87fe91edbc79161ec96d257c05944a3898aa",
      "parents": [
        "4e3c4b82dc4fc42c7b9ab71a94ff1f8d308f67be"
      ],
      "author": {
        "name": "QuakeWang",
        "email": "45645138+QuakeWang@users.noreply.github.com",
        "time": "Thu Jun 04 12:13:45 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 12:13:45 2026 +0800"
      },
      "message": "[python][ray] Pin merge source table snapshot (#8110)\n\nRay merge-into already pins target reads to the base snapshot, but\nPaimon source tables were still normalized through `read_paimon` without\nan explicit snapshot. Because Ray Dataset execution is lazy, source\nplanning could otherwise observe a later table snapshot than the one\nseen during merge preparation.\n\nThis PR captures the latest snapshot id for string source tables during\n`_prepare` and passes it to `read_paimon`, so the source side uses a\nstable snapshot throughout merge planning and execution."
    },
    {
      "commit": "4e3c4b82dc4fc42c7b9ab71a94ff1f8d308f67be",
      "tree": "bd766b4cd98306a164877557a65cad8deaf4f560",
      "parents": [
        "4a5462d47ed8cb7c0f2810cd91126b473488af60"
      ],
      "author": {
        "name": "zhoulii",
        "email": "zhouli.dev@foxmail.com",
        "time": "Thu Jun 04 12:13:23 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 12:13:23 2026 +0800"
      },
      "message": "[python] Align ExternalPathProvider with Java multi-strategy support. (#8104)"
    },
    {
      "commit": "4a5462d47ed8cb7c0f2810cd91126b473488af60",
      "tree": "a0cdbbffb6d48eb6a2df994d6b377eb117bf72c2",
      "parents": [
        "6e14824885462f0fdb9db33361e67aae51d30fa6"
      ],
      "author": {
        "name": "YeJunHao",
        "email": "41894543+leaves12138@users.noreply.github.com",
        "time": "Thu Jun 04 11:01:50 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Jun 04 11:01:50 2026 +0800"
      },
      "message": "[core] Filter side files in BTree global index scans (#8109)\n\nBTree global index scan planning should avoid unnecessary dedicated side\nfiles such as blob and vector-store files. However, pruning by\n`readType` is too broad for data-evolution tables: old normal data files\nmay not contain a newly added indexed column, but they still need to be\nscanned and indexed with a NULL key."
    },
    {
      "commit": "6e14824885462f0fdb9db33361e67aae51d30fa6",
      "tree": "1d578bc1f387de5414ceac3bf3d3cce501de8ce4",
      "parents": [
        "4d0a6515670767c6db22823201e33d33a19b7b56"
      ],
      "author": {
        "name": "QuakeWang",
        "email": "45645138+QuakeWang@users.noreply.github.com",
        "time": "Wed Jun 03 21:45:56 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 21:45:56 2026 +0800"
      },
      "message": "[flink][cdc] Fix schema event position in source reader (#8099)"
    },
    {
      "commit": "4d0a6515670767c6db22823201e33d33a19b7b56",
      "tree": "dcae154218a70fadfdcad1d660457faeaf2f90a5",
      "parents": [
        "c864884bc92bab09e2b39bddaeab5e61483ba7ad"
      ],
      "author": {
        "name": "huangxiaoping",
        "email": "1754789345@qq.com",
        "time": "Wed Jun 03 21:45:37 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 21:45:37 2026 +0800"
      },
      "message": "[hive] Avoid treating empty partitioned tables as unpartitioned during migration (#8100)"
    },
    {
      "commit": "c864884bc92bab09e2b39bddaeab5e61483ba7ad",
      "tree": "d35615a3e47e12dac53749f698a1027d08e7ba5c",
      "parents": [
        "4a71298bcbc4bec42ba404d114161b8440ea2a95"
      ],
      "author": {
        "name": "umi",
        "email": "55790489+discivigour@users.noreply.github.com",
        "time": "Wed Jun 03 21:17:20 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 21:17:20 2026 +0800"
      },
      "message": "[python] Push limit down to the reader layer for append table (#8102)"
    },
    {
      "commit": "4a71298bcbc4bec42ba404d114161b8440ea2a95",
      "tree": "8ef492ab4174b7073ccce45d455dbeb51884ae90",
      "parents": [
        "e4d0573aed02e341bb8fc6411a5280d7ed4db2b5"
      ],
      "author": {
        "name": "Faiz",
        "email": "wxy407679@antgroup.com",
        "time": "Wed Jun 03 19:42:35 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 19:42:35 2026 +0800"
      },
      "message": "[python] introduce BlobConsumer mirroring Java module (#8105)"
    },
    {
      "commit": "e4d0573aed02e341bb8fc6411a5280d7ed4db2b5",
      "tree": "95c1112a217f9490c5264a85ab919a0ac37a1e5f",
      "parents": [
        "5952c0105a7743c04910aa52ba736959bc2e957b"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Wed Jun 03 19:22:19 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 19:22:19 2026 +0800"
      },
      "message": "[python][ray] Ray merge into support condition (#8076)"
    },
    {
      "commit": "5952c0105a7743c04910aa52ba736959bc2e957b",
      "tree": "8964c5ff1e9e9594fddac5d349a3a9cd3d2568cb",
      "parents": [
        "9a31504aaf8e4ee293aee80c65a403bf586a10ca"
      ],
      "author": {
        "name": "YeJunHao",
        "email": "41894543+leaves12138@users.noreply.github.com",
        "time": "Wed Jun 03 16:52:48 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 16:52:48 2026 +0800"
      },
      "message": "[core] Require database for blob view serialization (#8095)"
    },
    {
      "commit": "9a31504aaf8e4ee293aee80c65a403bf586a10ca",
      "tree": "3bfd8c0fb8f700235bc309f0de96b86f17d0ffdf",
      "parents": [
        "2093598e1f532bc905b485bd73478a92265325a9"
      ],
      "author": {
        "name": "Jingsong Lee",
        "email": "jingsonglee0@gmail.com",
        "time": "Wed Jun 03 16:45:51 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 16:45:51 2026 +0800"
      },
      "message": "[python] Integrate paimon-mosaic format into PyPaimon (#8098)"
    },
    {
      "commit": "2093598e1f532bc905b485bd73478a92265325a9",
      "tree": "4d7fe25c8e18377db3fda036aa9569637d61b39a",
      "parents": [
        "dd3e67e85a6c80a65a4143f1ca2eaaa1123058b3"
      ],
      "author": {
        "name": "XiaoHongbo",
        "email": "xiaohongbo.xhb@alibaba-inc.com",
        "time": "Wed Jun 03 16:39:18 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 16:39:18 2026 +0800"
      },
      "message": "[python] Fix upsert row_id validation failure on tables with row_id holes (#8092)"
    },
    {
      "commit": "dd3e67e85a6c80a65a4143f1ca2eaaa1123058b3",
      "tree": "d6fce0850c5df367431e71c32c053078a103746f",
      "parents": [
        "a993a212d579759624a08cb294b05ed5d99f7c05"
      ],
      "author": {
        "name": "Stefanietry",
        "email": "zhou1172026225@gmail.com",
        "time": "Wed Jun 03 16:24:29 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 16:24:29 2026 +0800"
      },
      "message": "[spark] fix build paimon scan of process vector search for spark3.2 (#8089)"
    },
    {
      "commit": "a993a212d579759624a08cb294b05ed5d99f7c05",
      "tree": "1e99bb9462aff2fed9d7b2c8eda2b754fdfe5df3",
      "parents": [
        "0f837d1599d867eafc021d8e4a55b0084866e59b"
      ],
      "author": {
        "name": "jerry",
        "email": "jinglining0@gmail.com",
        "time": "Wed Jun 03 15:07:39 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 15:07:39 2026 +0800"
      },
      "message": "[fix] fix lumina package repo (#8097)"
    },
    {
      "commit": "0f837d1599d867eafc021d8e4a55b0084866e59b",
      "tree": "b7ed3ceec9ce6a6a2cc838669cba609e6b86b44e",
      "parents": [
        "a8e333c4676f2d726dc749cf8dadd2c8b954a3ac"
      ],
      "author": {
        "name": "QuakeWang",
        "email": "45645138+QuakeWang@users.noreply.github.com",
        "time": "Wed Jun 03 14:12:01 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 14:12:01 2026 +0800"
      },
      "message": "[flink][cdc] Keep table-aware split type during checkpoint (#8093)\n\n`TableAwareFileStoreSourceSplit` extends `FileStoreSourceSplit`, but it\ninherited `updateWithRecordsToSkip` from the parent class. During reader\ncheckpointing, `FileStoreSourceSplitState.toSourceSplit()` calls that\nmethod and returned a plain `FileStoreSourceSplit`, dropping the CDC\ntable metadata and causing CDC reader state restoration to cast the\nsplit back to `TableAwareFileStoreSourceSplit`.\n\nThis PR overrides `updateWithRecordsToSkip` in\n`TableAwareFileStoreSourceSplit` so checkpointed active splits keep\ntheir table-aware type and preserve `identifier`, `lastSchemaId`, and\n`schemaId`."
    },
    {
      "commit": "a8e333c4676f2d726dc749cf8dadd2c8b954a3ac",
      "tree": "dd8b66ffd5fd58caa40cfb53f231b0f696c9b3b5",
      "parents": [
        "c77a58e5a0856cc795d877c06977ef5d9e6e9142"
      ],
      "author": {
        "name": "Jingsong Lee",
        "email": "jingsonglee0@gmail.com",
        "time": "Wed Jun 03 13:09:24 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 13:09:24 2026 +0800"
      },
      "message": "[format] Add paimon-mosaic module with reader and writer (#7917)\n\nSee https://paimon.apache.org/docs/mosaic/\n\nIntroduces the Mosaic file format integration for Paimon with:\n- MosaicRecordsReader: row-group level predicate filtering using\nstatistics, column projection, and correct returnedPosition tracking\n- MosaicRecordsWriter: BundleFormatWriter with writerMetadata() support\nfor in-memory stats capture (avoids re-reading files on object stores)\n- MosaicSimpleStatsExtractor: stats extraction from file or\nwriterMetadata, with SimpleColStatsCollector integration\n- MosaicObjects: byte[] to Paimon object conversion for all supported\ntypes\n- Comprehensive test suite (6 test classes covering unit and integration\ntests)"
    },
    {
      "commit": "c77a58e5a0856cc795d877c06977ef5d9e6e9142",
      "tree": "f80b2ec6de35ab0b62b4595f7ad682a20d500621",
      "parents": [
        "2476815121ca1c1ffc260d5cb1c5febccdb913a9"
      ],
      "author": {
        "name": "Jingsong Lee",
        "email": "jingsonglee0@gmail.com",
        "time": "Wed Jun 03 12:12:58 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 12:12:58 2026 +0800"
      },
      "message": "[flink] Fix flaky ConsumerActionITCase (#8086)"
    },
    {
      "commit": "2476815121ca1c1ffc260d5cb1c5febccdb913a9",
      "tree": "6b1d559b41ffec4b5e9a9756a80627da0db400c6",
      "parents": [
        "b839753f46d8d447ab4ba5f8770e1ca13c0a5288"
      ],
      "author": {
        "name": "Jingsong Lee",
        "email": "jingsonglee0@gmail.com",
        "time": "Wed Jun 03 12:12:44 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 12:12:44 2026 +0800"
      },
      "message": "[tantivy] Support configurable full-text analyzers (#8074)\n\nThis PR expands Tantivy full-text global index tokenizer support into a\nconfigurable analyzer pipeline. It keeps the existing ngram and Jieba\nsupport, adds common LanceDB-style tokenizer/filter options, and wires\nthe same metadata through Java, Rust JNI, and PyPaimon readers."
    },
    {
      "commit": "b839753f46d8d447ab4ba5f8770e1ca13c0a5288",
      "tree": "8f8eae7b902c2a4e57efd5b7e530ae8c274237eb",
      "parents": [
        "38051b40c97963a0b5950bbb72c84c11eadeecb1"
      ],
      "author": {
        "name": "chaoyang",
        "email": "chaoyang@apache.org",
        "time": "Wed Jun 03 10:28:14 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 10:28:14 2026 +0800"
      },
      "message": "[python] Add HDFS native FileIO backend (no Hadoop install required) (#8031)\n\nIntroduces HdfsNativeFileIO backed by the hdfs-native protocol client\n(Rust + PyO3)\n\nDefault backend for hdfs:// and viewfs:// switches to native; the\nPyArrow / libhdfs path is kept, with auto-fallback when hdfs-native is\nunavailable (e.g. on Windows or when the extra is not installed)."
    },
    {
      "commit": "38051b40c97963a0b5950bbb72c84c11eadeecb1",
      "tree": "a87613349ccf75ead4b88680d6b81599452a18ac",
      "parents": [
        "5d4433360793ca48f34e90864983ef7c52d94bc9"
      ],
      "author": {
        "name": "QuakeWang",
        "email": "45645138+QuakeWang@users.noreply.github.com",
        "time": "Wed Jun 03 08:58:10 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Wed Jun 03 08:58:10 2026 +0800"
      },
      "message": "[python][ray] Honor partition overwrite in write_ray (#8088)\n\n`TableWrite.write_ray()` previously did not carry builder-level\noverwrite partitions into the Ray datasink. As a result,\n`table.new_batch_write_builder().overwrite({...}).new_write().write_ray(...)`\nwrote through Ray without the configured partition overwrite contract,\nwhile `overwrite\u003dTrue` only supported full-table overwrite.\n\nThis PR carries the builder static partition into `TableWrite`, forwards\nit to `PaimonDatasink`, and applies the same overwrite partition on both\nRay write tasks and the driver-side commit path."
    },
    {
      "commit": "5d4433360793ca48f34e90864983ef7c52d94bc9",
      "tree": "ab5fee507354f1398b405754e9667a9cbe9b46ac",
      "parents": [
        "5adb6c9d4cb7dfa102c4dd3da2d0e5de0674d75e"
      ],
      "author": {
        "name": "Jingsong Lee",
        "email": "jingsonglee0@gmail.com",
        "time": "Tue Jun 02 23:29:43 2026 +0800"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Tue Jun 02 23:29:43 2026 +0800"
      },
      "message": "[core][python] Fix blob updates and compaction (#8077)\n\nFix BLOB column updates in data-evolution append tables across Java and\nPython, and make BLOB compaction handle updated multi-version BLOB\nfiles. Unchanged BLOB values are now represented with placeholders and\nresolved from older BLOB files during reads and compaction."
    }
  ],
  "next": "5adb6c9d4cb7dfa102c4dd3da2d0e5de0674d75e"
}
