)]}'
{
  "commit": "2059c112b9af5e634c6dfbab5560c786e83f0c90",
  "tree": "282751ce14bae21b3877f42dd985090e651a16de",
  "parents": [
    "217e2a7a5400c234773e614afcdfc70624308b8a"
  ],
  "author": {
    "name": "Sivabalan Narayanan",
    "email": "n.siva.b@gmail.com",
    "time": "Fri Apr 24 10:27:09 2026 -0700"
  },
  "committer": {
    "name": "GitHub",
    "email": "noreply@github.com",
    "time": "Fri Apr 24 10:27:09 2026 -0700"
  },
  "message": "feat(clean): Adding empty clean support to hudi (#18337)\n\nThis PR adds support for creating empty clean commits to optimize clean planning performance for append-only datasets.\n\nProblem: In datasets with incremental cleaning enabled that receive infrequent updates or are primarily append-only, the clean planner performs a full table scan on every ingestion run because there are\nno clean plans to mark progress. This leads to significant performance overhead, especially for large tables.\n\nSolution: Introduce a new configuration hoodie.write.empty.clean.internval.hours that allows creating empty clean commits after a configurable duration. These empty clean commits update the\nearliestCommitToRetain value, enabling subsequent clean planning operations to only scan partitions modified after the last empty clean, avoiding expensive full table scans.\n\nSummary and Changelog\nUser-facing changes:\nNew advanced config hoodie.write.empty.clean.create.duration.ms (default: -1, disabled) to control when empty clean commits should be created\nWhen enabled with incremental cleaning, Hudi will create empty clean commits after the specified duration (in milliseconds) to optimize clean planning performance\n\nDetailed changes:\n\nConfig Addition (HoodieCleanConfig.java):\n- Addedhoodie.write.empty.clean.internval.hours config property with builder method\n- Marked as advanced config for power users\nClean Execution (CleanActionExecutor.java):\n- Modified clean parallelism calculation to ensure minimum of 1 (was causing issues with empty plans)\n- Added createEmptyCleanMetadata() method to construct metadata for empty cleans\n- Updated runClean() to handle empty clean stats by creating appropriate metadata\nClean Planning (CleanPlanActionExecutor.java):\n- Added getEmptyCleanerPlan() method to construct cleaner plans with no files to delete\n- Modified requestClean() to return empty plans when partitions list is empty\n- Added logic in requestCleanInternal() to check if empty clean commit should be created based on:\nIncremental cleaning enabled\nTime since last clean \u003e configured threshold\nValid earliestInstantToRetain present\n\nImpact\nPerformance Impact: Positive - significantly reduces clean planning time for append-only or infrequently updated datasets by avoiding full table scans\n\nAPI Changes: None - purely additive configuration\n\nBehavior Changes:\n\nWhen enabled, users will see empty clean commits in the timeline at the configured intervals\nThese commits have totalFilesDeleted\u003d0 and empty partition metadata but contain valid earliestCommitToRetain metadata",
  "tree_diff": [
    {
      "type": "modify",
      "old_id": "0565f8f9f44cb32773484ed8601e984016d01d4d",
      "old_mode": 33188,
      "old_path": "hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java",
      "new_id": "874c5445cbd5fb058d24e0b0809dbb211822ef76",
      "new_mode": 33188,
      "new_path": "hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java"
    },
    {
      "type": "modify",
      "old_id": "49dc59657489766fa781f6d903da0b278afe0afa",
      "old_mode": 33188,
      "old_path": "hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java",
      "new_id": "569be10c124446d9a92d2a0e5ad69595b6388fef",
      "new_mode": 33188,
      "new_path": "hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java"
    },
    {
      "type": "modify",
      "old_id": "64262ab673a29cfe865a6fcfe6bdba87e4f21cb5",
      "old_mode": 33188,
      "old_path": "hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java",
      "new_id": "3fad30b22bdf049008d48064eef2daa2b8b22907",
      "new_mode": 33188,
      "new_path": "hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java"
    },
    {
      "type": "modify",
      "old_id": "dbdc9857012d6d18f62c1f8d58dc2c54a52f388d",
      "old_mode": 33188,
      "old_path": "hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java",
      "new_id": "0868a85cabb93af9216a38832179b2a64c82c6c6",
      "new_mode": 33188,
      "new_path": "hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java"
    },
    {
      "type": "modify",
      "old_id": "cc53e67b74872b4ae2327cae8ab27b7dfbe99d36",
      "old_mode": 33188,
      "old_path": "hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/TestCleanPlanner.java",
      "new_id": "0b42837b35869efaae093374ea831f862e420310",
      "new_mode": 33188,
      "new_path": "hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/TestCleanPlanner.java"
    },
    {
      "type": "modify",
      "old_id": "6b4f4f716019639466eaef30cbf72b6158ec7a6c",
      "old_mode": 33188,
      "old_path": "hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java",
      "new_id": "f57ee82385f6c542347119355af43faa2cf9e8a5",
      "new_mode": 33188,
      "new_path": "hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java"
    },
    {
      "type": "modify",
      "old_id": "66385e0952e9651dd940042bf37dfe0c26d77fa1",
      "old_mode": 33188,
      "old_path": "hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java",
      "new_id": "ce7070c285882f2764928d548de6dffb092e7f09",
      "new_mode": 33188,
      "new_path": "hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java"
    },
    {
      "type": "modify",
      "old_id": "aa1a5ee6a96975c6755a90417d25e612f40d32d6",
      "old_mode": 33188,
      "old_path": "hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieCleanerTestBase.java",
      "new_id": "66f2f595ef4da4116956a21bdb730c7f9f3e9fc1",
      "new_mode": 33188,
      "new_path": "hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieCleanerTestBase.java"
    }
  ]
}
