dev/verify_release_calendar.py - airflow - Git at Google

 #!/usr/bin/env python3
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 # /// script
 # requires-python = ">=3.8"
 # dependencies = [
 #     "requests",
 #     "beautifulsoup4",
 #     "icalendar",
 #     "rich",
 # ]
 # ///
 """
 Verify that releases planned in Confluence wiki have matching Google Calendar entries.

 This script fetches the release plan from the Confluence wiki page and compares it
 with the Google Calendar entries to ensure they match.

 Release Plan: https://cwiki.apache.org/confluence/display/AIRFLOW/Release+Plan
 Calendar iCal: https://calendar.google.com/calendar/ical/c_de214e92df3b759779cb65f3e49e562796c6126e7500cfa7e524bf78186d8b5e%40group.calendar.google.com/public/basic.ics
 """

 from __future__ import annotations

 import argparse
 import sys
 import time
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
 from typing import Any

 import requests
 from bs4 import BeautifulSoup
 from icalendar import Calendar
 from rich.console import Console

 # Configure console
 console = Console()

 # Constants
 CONFLUENCE_URL = "https://cwiki.apache.org/confluence/display/AIRFLOW/Release+Plan"
 CALENDAR_ICAL_URL = (
     "https://calendar.google.com/calendar/ical/"
     "c_de214e92df3b759779cb65f3e49e562796c6126e7500cfa7e524bf78186d8b5e%40group.calendar.google.com/"
     "public/basic.ics"
 )


 @dataclass
 class Release:
     """Represents a planned release."""

     release_type: str  # "Airflow Ctl" or "Providers"
     version: str
     date: datetime
     release_manager: str

     def __str__(self):
         return f"{self.release_type} {self.version} on {self.date.strftime('%Y-%m-%d')} by {self.release_manager}"


 @dataclass
 class CalendarEntry:
     """Represents a calendar entry."""

     summary: str
     start_date: datetime
     description: str | None = None

     def __str__(self):
         return f"{self.summary} on {self.start_date.strftime('%Y-%m-%d')}"


 def fetch_confluence_page() -> str:
     """Fetch the Confluence release plan page with retry logic."""
     console.print(f"[cyan]Fetching Confluence page:[/cyan] {CONFLUENCE_URL}")

     max_retries = 3
     retry_delay = 10

     for attempt in range(1, 1 + max_retries):
         try:
             response = requests.get(CONFLUENCE_URL, timeout=30)
             response.raise_for_status()
             return response.text
         except requests.RequestException as e:
             if attempt < max_retries:
                 console.print(f"[yellow]Attempt {attempt}/{max_retries} failed: {e}[/yellow]")
                 console.print(f"[yellow]Retrying in {retry_delay} seconds...[/yellow]")
                 time.sleep(retry_delay)
             else:
                 console.print(f"[red]Failed to fetch Confluence page after {max_retries} attempts:[/red] {e}")
                 sys.exit(1)
     return ""


 def print_confluence_debug_info(soup: BeautifulSoup) -> None:
     """Print debug information about the Confluence page structure."""
     all_headings = soup.find_all(["h1", "h2", "h3", "h4", "h5"])
     console.print(f"[dim]Found {len(all_headings)} headings in page[/dim]")
     if all_headings:
         console.print("[dim]First 10 headings:[/dim]")
         for heading in all_headings[:10]:
             console.print(f"  [dim]{heading.name}: {heading.get_text(strip=True)[:80]}[/dim]")

     all_tables = soup.find_all("table")
     console.print(f"[dim]Found {len(all_tables)} tables in page[/dim]")


 def get_release_sections() -> dict[str, list[str]]:
     """Return the mapping of release types to their possible section names."""
     return {
         "Airflow Ctl": ["Airflow Ctl", "airflow-ctl", "airflow ctl"],
         "Providers": [
             "Support for Airflow in Providers",
             "Provider Releases",
             "Providers",
             "Provider",
             "provider release",
         ],
     }


 def find_table_for_heading(heading: Any) -> Any | None:
     """Find the table associated with a heading."""
     # Try to find table as sibling first
     current = heading.find_next_sibling()
     while current:
         if current.name == "table":
             console.print("  [dim]Found table directly after heading[/dim]")
             return current
         if current.name in ["h1", "h2", "h3", "h4", "h5"]:
             # Stop if we hit another heading
             break
         current = current.find_next_sibling()

     # If no table found as sibling, try finding next table in document
     next_table = heading.find_next("table")
     if next_table:
         console.print("  [dim]Found table via find_next[/dim]")
         return next_table

     return None


 def find_section_and_parse(soup: BeautifulSoup, release_type: str, section_names: list[str]) -> list[Release]:
     """Find a section by name and parse its table."""
     headings = soup.find_all(["h1", "h2", "h3", "h4", "h5"])
     for section_name in section_names:
         for heading in headings:
             heading_text = heading.get_text(strip=True)
             if section_name.lower() in heading_text.lower():
                 console.print(f"[green]Found section:[/green] {heading_text}")
                 table = find_table_for_heading(heading)
                 if table is not None:
                     return parse_table(table, release_type)
                 break
     return []


 def parse_confluence_releases(html_content: str) -> list[Release]:
     """Parse releases from Confluence HTML content."""
     console.print("[cyan]Parsing Confluence releases...[/cyan]")
     soup = BeautifulSoup(html_content, "html.parser")

     print_confluence_debug_info(soup)

     releases: list[Release] = []
     release_sections = get_release_sections()

     for release_type, section_names in release_sections.items():
         section_releases = find_section_and_parse(soup, release_type, section_names)
         if section_releases:
             releases.extend(section_releases)
         else:
             console.print(f"[yellow]Could not find section for {release_type}[/yellow]")

     console.print(f"[green]Found {len(releases)} releases in Confluence[/green]")
     return releases


 def get_table_headers(rows: list[Any]) -> tuple[list[str], bool]:
     """Extract and normalize table headers. Returns headers and whether table is valid."""
     if len(rows) < 2:
         console.print("  [yellow]Table has no data rows[/yellow]")
         return [], False

     header_cells = rows[0].find_all(["td", "th"])
     headers = [cell.get_text(strip=True).lower() for cell in header_cells]
     console.print(f"  [dim]Headers: {headers}[/dim]")
     return headers, True


 def find_column_indices(headers: list[str]) -> tuple[int | None, int | None, int | None]:
     """Find the indices of version, date, and manager columns."""
     version_idx = None
     date_idx = None
     manager_idx = None

     for idx, header in enumerate(headers):
         if "version" in header and "suffix" not in header:
             version_idx = idx
         elif any(word in header for word in ["date", "cut date", "planned cut date"]):
             date_idx = idx
         elif any(word in header for word in ["manager", "release manager"]):
             manager_idx = idx

     console.print(
         f"  [dim]Column mapping - version: {version_idx}, date: {date_idx}, manager: {manager_idx}[/dim]"
     )
     return version_idx, date_idx, manager_idx


 def parse_date_string(date_str: str) -> datetime | None:
     """Parse a date string in various formats."""
     date_formats = [
         "%d %b %Y",  # 09 Dec 2025
         "%d %B %Y",  # 09 December 2025
         "%Y-%m-%d",  # 2025-12-06
         "%Y/%m/%d",  # 2025/12/06
         "%m/%d/%Y",  # 12/06/2025
         "%d-%m-%Y",  # 06-12-2025
         "%b %d, %Y",  # Dec 09, 2025
         "%B %d, %Y",  # December 09, 2025
     ]

     # Handle "Week of DD Mon YYYY" format
     clean_date_str = date_str
     if "week of" in date_str.lower():
         clean_date_str = date_str.lower().replace("week of", "").strip()

     for date_format in date_formats:
         try:
             return datetime.strptime(clean_date_str, date_format)
         except ValueError:
             continue

     console.print(
         f"  [yellow]Could not parse date:[/yellow] '{date_str}' (tried {len(date_formats)} formats)"
     )
     return None


 def extract_manager_first_name(release_manager: str) -> str:
     """Extract the first name from a release manager string."""
     if "+" in release_manager:
         return release_manager.split("+")[0].strip().split()[0]
     return release_manager.split()[0] if release_manager else ""


 def generate_version_from_date(date: datetime) -> str:
     """Generate a version string from a date for releases without explicit versions."""
     return date.strftime("%Y.%m.%d")


 def parse_table_row(
     cells: list[Any],
     row_num: int,
     version_idx: int | None,
     date_idx: int | None,
     manager_idx: int | None,
     release_type: str,
 ) -> Release | None:
     """Parse a single table row into a Release object."""
     try:
         # Extract data from cells
         date_str = cells[date_idx].get_text(strip=True) if date_idx is not None else ""
         release_manager = cells[manager_idx].get_text(strip=True) if manager_idx is not None else ""
         version = cells[version_idx].get_text(strip=True) if version_idx is not None else None

         # Skip empty rows
         if not date_str or not release_manager:
             console.print(f"  [dim]Row {row_num}: Skipping empty row[/dim]")
             return None

         # Parse date
         date = parse_date_string(date_str)
         if not date:
             return None

         # Extract manager name
         release_manager_first = extract_manager_first_name(release_manager)

         # Generate version if needed
         if version_idx is None or not version:
             version = generate_version_from_date(date)

         release = Release(
             release_type=release_type,
             version=version,
             date=date,
             release_manager=release_manager_first,
         )
         console.print(f"  [green]Parsed:[/green] {release}")
         return release

     except (IndexError, ValueError) as e:
         console.print(f"[yellow]Error parsing row {row_num}:[/yellow] {e}")
         return None


 def parse_table(table: Any, release_type: str) -> list[Release]:
     """Parse a release table from HTML."""
     releases: list[Release] = []
     rows = table.find_all("tr")

     console.print(f"  [dim]Table has {len(rows)} rows[/dim]")

     # Get and validate headers
     headers, is_valid = get_table_headers(rows)
     if not is_valid:
         return releases

     # Find column indices
     version_idx, date_idx, manager_idx = find_column_indices(headers)

     if date_idx is None or manager_idx is None:
         console.print("  [yellow]Could not find required columns (date and manager)[/yellow]")
         return releases

     # Parse data rows
     for i, row in enumerate(rows[1:], start=1):
         cells = row.find_all(["td", "th"])
         if len(cells) < max(filter(None, [version_idx, date_idx, manager_idx])) + 1:
             console.print(f"  [dim]Row {i}: Skipping (not enough cells)[/dim]")
             continue

         release = parse_table_row(cells, i, version_idx, date_idx, manager_idx, release_type)
         if release:
             releases.append(release)

     return releases


 def parse_calendar_component(component: Any) -> CalendarEntry | None:
     """Parse a calendar component into a CalendarEntry."""
     if component.name != "VEVENT":
         return None

     summary = str(component.get("summary", ""))
     dtstart = component.get("dtstart")
     description = component.get("description", "")

     if not dtstart:
         return None

     # Handle both date and datetime objects
     if hasattr(dtstart.dt, "date"):
         start_date = datetime.combine(dtstart.dt.date(), datetime.min.time())
     elif isinstance(dtstart.dt, datetime):
         start_date = dtstart.dt
     else:
         start_date = datetime.combine(dtstart.dt, datetime.min.time())

     return CalendarEntry(
         summary=summary,
         start_date=start_date,
         description=str(description) if description else None,
     )


 def fetch_calendar_entries() -> list[CalendarEntry]:
     """Fetch and parse calendar entries from iCal feed with retry logic."""
     console.print(f"[cyan]Fetching calendar:[/cyan] {CALENDAR_ICAL_URL}")

     max_retries = 3
     retry_delay = 10
     calendar_data = b""

     for attempt in range(1, 1 + max_retries):
         try:
             response = requests.get(CALENDAR_ICAL_URL, timeout=30)
             response.raise_for_status()
             calendar_data = response.content
             break
         except requests.RequestException as e:
             if attempt < max_retries - 1:
                 console.print(f"[yellow]Attempt {attempt}/{max_retries} failed: {e}[/yellow]")
                 console.print(f"[yellow]Retrying in {retry_delay} seconds...[/yellow]")
                 time.sleep(retry_delay)
             else:
                 console.print(f"[red]Failed to fetch calendar after {max_retries} attempts:[/red] {e}")
                 sys.exit(1)

     console.print("[cyan]Parsing calendar entries...[/cyan]")
     calendar = Calendar.from_ical(calendar_data)
     entries = []

     for component in calendar.walk():
         entry = parse_calendar_component(component)
         if entry:
             entries.append(entry)

     console.print(f"[green]Found {len(entries)} calendar entries[/green]")
     return entries


 def normalize_name(name: str) -> str:
     """Normalize a name by removing accents and converting to lowercase."""
     import unicodedata

     # Normalize unicode characters (NFD = decompose, then filter out combining marks)
     nfd = unicodedata.normalize("NFD", name)
     # Remove combining characters (accents)
     without_accents = "".join(char for char in nfd if unicodedata.category(char) != "Mn")
     return without_accents.lower().strip()


 def dates_match(release_date: datetime, entry_date: datetime) -> bool:
     """Check if two dates match (same year, month, and day)."""
     return (
         entry_date.year == release_date.year
         and entry_date.month == release_date.month
         and entry_date.day == release_date.day
     )


 def check_release_type_match(release_type: str, summary: str) -> bool:
     """Check if release type matches the calendar entry summary."""
     normalized_summary = normalize_name(summary)
     normalized_release_type = normalize_name(release_type)

     # Check if release type is in the summary (case-insensitive, accent-insensitive)
     if normalized_release_type in normalized_summary:
         return True

     # Handle "Airflow Ctl" vs "Airflow CTL" variations
     if "airflow" in normalized_release_type and "ctl" in normalized_release_type:
         return "airflow" in normalized_summary and "ctl" in normalized_summary

     return False


 def check_version_match(version: str, summary: str) -> bool:
     """Check if version appears in the calendar entry summary."""
     return version in summary


 def check_manager_match(manager_name: str, summary: str) -> bool:
     """Check if manager's name appears in the calendar entry summary."""
     import re

     normalized_manager = normalize_name(manager_name)
     normalized_summary = normalize_name(summary)

     # Check if manager name appears anywhere in summary
     if normalized_manager in normalized_summary:
         return True

     # Check if the manager appears as a word (not just substring)
     manager_pattern = r"\b" + re.escape(normalized_manager) + r"\b"
     return bool(re.search(manager_pattern, normalized_summary))


 def is_matching_entry(release: Release, entry: CalendarEntry) -> bool:
     """
     Check if a calendar entry matches a release.

     A match requires:
     - Matching dates
     - Matching release type OR version
     - Matching release manager name
     """
     if not dates_match(release.date, entry.start_date):
         return False

     release_type_match = check_release_type_match(release.release_type, entry.summary)
     version_match = check_version_match(release.version, entry.summary)
     manager_match = check_manager_match(release.release_manager, entry.summary)

     # Consider it a match if date + (type or version) + manager match
     return (release_type_match or version_match) and manager_match


 def find_matching_entry(release: Release, calendar_entries: list[CalendarEntry]) -> CalendarEntry | None:
     """Find a calendar entry that matches the given release, or None if not found."""
     for entry in calendar_entries:
         if is_matching_entry(release, entry):
             return entry
     return None


 def print_verification_header() -> None:
     """Print the verification results header."""
     console.print("\n" + "=" * 80)
     console.print("[bold cyan]VERIFICATION RESULTS[/bold cyan]")
     console.print("=" * 80 + "\n")


 def print_matched_release(release: Release, entry: CalendarEntry) -> None:
     """Print information about a matched release."""
     console.print(f"[green]✓ MATCHED:[/green] {release}")
     console.print(f"  [dim]Calendar: {entry.summary}[/dim]")


 def print_unmatched_release(release: Release) -> None:
     """Print information about an unmatched release."""
     console.print(f"[red]✗ NOT MATCHED:[/red] {release}")


 def print_verification_summary(
     total_releases: int, matched_count: int, unmatched_releases: list[Release]
 ) -> None:
     """Print the verification summary."""
     console.print("\n" + "=" * 80)
     console.print("[bold cyan]SUMMARY[/bold cyan]")
     console.print("=" * 80)
     console.print(f"Total releases in Confluence: {total_releases}")
     console.print(f"Matched releases: [green]{matched_count}[/green]")
     console.print(f"Unmatched releases: [red]{len(unmatched_releases)}[/red]")

     if unmatched_releases:
         console.print("\n[yellow]Unmatched releases:[/yellow]")
         for release in unmatched_releases:
             console.print(f"  [yellow]•[/yellow] {release}")

     console.print("=" * 80 + "\n")


 def verify_releases(releases: list[Release], calendar_entries: list[CalendarEntry]) -> bool:
     """Verify that all releases have matching calendar entries."""
     print_verification_header()

     all_matched = True
     unmatched_releases: list[Release] = []
     matched_count = 0

     for release in releases:
         matching_entry = find_matching_entry(release, calendar_entries)

         if matching_entry:
             print_matched_release(release, matching_entry)
             matched_count += 1
         else:
             all_matched = False
             unmatched_releases.append(release)
             print_unmatched_release(release)

     print_verification_summary(len(releases), matched_count, unmatched_releases)

     return all_matched


 def load_html_content(args: argparse.Namespace) -> str:
     """Load HTML content from file or fetch from Confluence."""
     if args.load_html:
         console.print(f"[cyan]Loading HTML from file:[/cyan] {args.load_html}")
         return Path(args.load_html).read_text(encoding="utf-8")

     html_content = fetch_confluence_page()
     if args.save_html:
         console.print(f"[cyan]Saving HTML to file:[/cyan] {args.save_html}")
         Path(args.save_html).write_text(html_content, encoding="utf-8")
     return html_content


 def validate_releases(releases: list[Release]) -> None:
     """Validate that releases were found, exit if not."""
     if not releases:
         console.print("[red]No releases found in Confluence page![/red]")
         sys.exit(1)


 def validate_calendar_entries(calendar_entries: list[CalendarEntry]) -> None:
     """Validate that calendar entries were found, exit if not."""
     if not calendar_entries:
         console.print("[red]No calendar entries found![/red]")
         sys.exit(1)


 def print_final_result(all_matched: bool) -> None:
     """Print the final result and exit with appropriate code."""
     if all_matched:
         console.print("[bold green]✓ All releases have matching calendar entries![/bold green]")
         sys.exit(0)
     else:
         console.print("[bold red]✗ Some releases do not have matching calendar entries![/bold red]")
         sys.exit(1)


 def main():
     """Main function."""
     parser = argparse.ArgumentParser(
         description="Verify that planned releases in Confluence match Google Calendar entries"
     )
     parser.add_argument(
         "--save-html", metavar="FILE", help="Save the fetched Confluence HTML to a file for debugging"
     )
     parser.add_argument(
         "--load-html", metavar="FILE", help="Load Confluence HTML from a file instead of fetching"
     )
     args = parser.parse_args()

     # Fetch and parse data
     html_content = load_html_content(args)
     releases = parse_confluence_releases(html_content)
     validate_releases(releases)

     calendar_entries = fetch_calendar_entries()
     validate_calendar_entries(calendar_entries)

     # Verify and exit with appropriate code
     all_matched = verify_releases(releases, calendar_entries)
     print_final_result(all_matched)


 if __name__ == "__main__":
     main()
	#!/usr/bin/env python3
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	# /// script
	# requires-python = ">=3.8"
	# dependencies = [
	# "requests",
	# "beautifulsoup4",
	# "icalendar",
	# "rich",
	# ]
	# ///
	"""
	Verify that releases planned in Confluence wiki have matching Google Calendar entries.

	This script fetches the release plan from the Confluence wiki page and compares it
	with the Google Calendar entries to ensure they match.

	Release Plan: https://cwiki.apache.org/confluence/display/AIRFLOW/Release+Plan
	Calendar iCal: https://calendar.google.com/calendar/ical/c_de214e92df3b759779cb65f3e49e562796c6126e7500cfa7e524bf78186d8b5e%40group.calendar.google.com/public/basic.ics
	"""

	from __future__ import annotations

	import argparse
	import sys
	import time
	from dataclasses import dataclass
	from datetime import datetime
	from pathlib import Path
	from typing import Any

	import requests
	from bs4 import BeautifulSoup
	from icalendar import Calendar
	from rich.console import Console

	# Configure console
	console = Console()

	# Constants
	CONFLUENCE_URL = "https://cwiki.apache.org/confluence/display/AIRFLOW/Release+Plan"
	CALENDAR_ICAL_URL = (
	"https://calendar.google.com/calendar/ical/"
	"c_de214e92df3b759779cb65f3e49e562796c6126e7500cfa7e524bf78186d8b5e%40group.calendar.google.com/"
	"public/basic.ics"
	)


	@dataclass
	class Release:
	"""Represents a planned release."""

	release_type: str # "Airflow Ctl" or "Providers"
	version: str
	date: datetime
	release_manager: str

	def __str__(self):
	return f"{self.release_type} {self.version} on {self.date.strftime('%Y-%m-%d')} by {self.release_manager}"


	@dataclass
	class CalendarEntry:
	"""Represents a calendar entry."""

	summary: str
	start_date: datetime
	description: str \| None = None

	def __str__(self):
	return f"{self.summary} on {self.start_date.strftime('%Y-%m-%d')}"


	def fetch_confluence_page() -> str:
	"""Fetch the Confluence release plan page with retry logic."""
	console.print(f"[cyan]Fetching Confluence page:[/cyan] {CONFLUENCE_URL}")

	max_retries = 3
	retry_delay = 10

	for attempt in range(1, 1 + max_retries):
	try:
	response = requests.get(CONFLUENCE_URL, timeout=30)
	response.raise_for_status()
	return response.text
	except requests.RequestException as e:
	if attempt < max_retries:
	console.print(f"[yellow]Attempt {attempt}/{max_retries} failed: {e}[/yellow]")
	console.print(f"[yellow]Retrying in {retry_delay} seconds...[/yellow]")
	time.sleep(retry_delay)
	else:
	console.print(f"[red]Failed to fetch Confluence page after {max_retries} attempts:[/red] {e}")
	sys.exit(1)
	return ""


	def print_confluence_debug_info(soup: BeautifulSoup) -> None:
	"""Print debug information about the Confluence page structure."""
	all_headings = soup.find_all(["h1", "h2", "h3", "h4", "h5"])
	console.print(f"[dim]Found {len(all_headings)} headings in page[/dim]")
	if all_headings:
	console.print("[dim]First 10 headings:[/dim]")
	for heading in all_headings[:10]:
	console.print(f" [dim]{heading.name}: {heading.get_text(strip=True)[:80]}[/dim]")

	all_tables = soup.find_all("table")
	console.print(f"[dim]Found {len(all_tables)} tables in page[/dim]")


	def get_release_sections() -> dict[str, list[str]]:
	"""Return the mapping of release types to their possible section names."""
	return {
	"Airflow Ctl": ["Airflow Ctl", "airflow-ctl", "airflow ctl"],
	"Providers": [
	"Support for Airflow in Providers",
	"Provider Releases",
	"Providers",
	"Provider",
	"provider release",
	],
	}


	def find_table_for_heading(heading: Any) -> Any \| None:
	"""Find the table associated with a heading."""
	# Try to find table as sibling first
	current = heading.find_next_sibling()
	while current:
	if current.name == "table":
	console.print(" [dim]Found table directly after heading[/dim]")
	return current
	if current.name in ["h1", "h2", "h3", "h4", "h5"]:
	# Stop if we hit another heading
	break
	current = current.find_next_sibling()

	# If no table found as sibling, try finding next table in document
	next_table = heading.find_next("table")
	if next_table:
	console.print(" [dim]Found table via find_next[/dim]")
	return next_table

	return None


	def find_section_and_parse(soup: BeautifulSoup, release_type: str, section_names: list[str]) -> list[Release]:
	"""Find a section by name and parse its table."""
	headings = soup.find_all(["h1", "h2", "h3", "h4", "h5"])
	for section_name in section_names:
	for heading in headings:
	heading_text = heading.get_text(strip=True)
	if section_name.lower() in heading_text.lower():
	console.print(f"[green]Found section:[/green] {heading_text}")
	table = find_table_for_heading(heading)
	if table is not None:
	return parse_table(table, release_type)
	break
	return []


	def parse_confluence_releases(html_content: str) -> list[Release]:
	"""Parse releases from Confluence HTML content."""
	console.print("[cyan]Parsing Confluence releases...[/cyan]")
	soup = BeautifulSoup(html_content, "html.parser")

	print_confluence_debug_info(soup)

	releases: list[Release] = []
	release_sections = get_release_sections()

	for release_type, section_names in release_sections.items():
	section_releases = find_section_and_parse(soup, release_type, section_names)
	if section_releases:
	releases.extend(section_releases)
	else:
	console.print(f"[yellow]Could not find section for {release_type}[/yellow]")

	console.print(f"[green]Found {len(releases)} releases in Confluence[/green]")
	return releases


	def get_table_headers(rows: list[Any]) -> tuple[list[str], bool]:
	"""Extract and normalize table headers. Returns headers and whether table is valid."""
	if len(rows) < 2:
	console.print(" [yellow]Table has no data rows[/yellow]")
	return [], False

	header_cells = rows[0].find_all(["td", "th"])
	headers = [cell.get_text(strip=True).lower() for cell in header_cells]
	console.print(f" [dim]Headers: {headers}[/dim]")
	return headers, True


	def find_column_indices(headers: list[str]) -> tuple[int \| None, int \| None, int \| None]:
	"""Find the indices of version, date, and manager columns."""
	version_idx = None
	date_idx = None
	manager_idx = None

	for idx, header in enumerate(headers):
	if "version" in header and "suffix" not in header:
	version_idx = idx
	elif any(word in header for word in ["date", "cut date", "planned cut date"]):
	date_idx = idx
	elif any(word in header for word in ["manager", "release manager"]):
	manager_idx = idx

	console.print(
	f" [dim]Column mapping - version: {version_idx}, date: {date_idx}, manager: {manager_idx}[/dim]"
	)
	return version_idx, date_idx, manager_idx


	def parse_date_string(date_str: str) -> datetime \| None:
	"""Parse a date string in various formats."""
	date_formats = [
	"%d %b %Y", # 09 Dec 2025
	"%d %B %Y", # 09 December 2025
	"%Y-%m-%d", # 2025-12-06
	"%Y/%m/%d", # 2025/12/06
	"%m/%d/%Y", # 12/06/2025
	"%d-%m-%Y", # 06-12-2025
	"%b %d, %Y", # Dec 09, 2025
	"%B %d, %Y", # December 09, 2025
	]

	# Handle "Week of DD Mon YYYY" format
	clean_date_str = date_str
	if "week of" in date_str.lower():
	clean_date_str = date_str.lower().replace("week of", "").strip()

	for date_format in date_formats:
	try:
	return datetime.strptime(clean_date_str, date_format)
	except ValueError:
	continue

	console.print(
	f" [yellow]Could not parse date:[/yellow] '{date_str}' (tried {len(date_formats)} formats)"
	)
	return None


	def extract_manager_first_name(release_manager: str) -> str:
	"""Extract the first name from a release manager string."""
	if "+" in release_manager:
	return release_manager.split("+")[0].strip().split()[0]
	return release_manager.split()[0] if release_manager else ""


	def generate_version_from_date(date: datetime) -> str:
	"""Generate a version string from a date for releases without explicit versions."""
	return date.strftime("%Y.%m.%d")


	def parse_table_row(
	cells: list[Any],
	row_num: int,
	version_idx: int \| None,
	date_idx: int \| None,
	manager_idx: int \| None,
	release_type: str,
	) -> Release \| None:
	"""Parse a single table row into a Release object."""
	try:
	# Extract data from cells
	date_str = cells[date_idx].get_text(strip=True) if date_idx is not None else ""
	release_manager = cells[manager_idx].get_text(strip=True) if manager_idx is not None else ""
	version = cells[version_idx].get_text(strip=True) if version_idx is not None else None

	# Skip empty rows
	if not date_str or not release_manager:
	console.print(f" [dim]Row {row_num}: Skipping empty row[/dim]")
	return None

	# Parse date
	date = parse_date_string(date_str)
	if not date:
	return None

	# Extract manager name
	release_manager_first = extract_manager_first_name(release_manager)

	# Generate version if needed
	if version_idx is None or not version:
	version = generate_version_from_date(date)

	release = Release(
	release_type=release_type,
	version=version,
	date=date,
	release_manager=release_manager_first,
	)
	console.print(f" [green]Parsed:[/green] {release}")
	return release

	except (IndexError, ValueError) as e:
	console.print(f"[yellow]Error parsing row {row_num}:[/yellow] {e}")
	return None


	def parse_table(table: Any, release_type: str) -> list[Release]:
	"""Parse a release table from HTML."""
	releases: list[Release] = []
	rows = table.find_all("tr")

	console.print(f" [dim]Table has {len(rows)} rows[/dim]")

	# Get and validate headers
	headers, is_valid = get_table_headers(rows)
	if not is_valid:
	return releases

	# Find column indices
	version_idx, date_idx, manager_idx = find_column_indices(headers)

	if date_idx is None or manager_idx is None:
	console.print(" [yellow]Could not find required columns (date and manager)[/yellow]")
	return releases

	# Parse data rows
	for i, row in enumerate(rows[1:], start=1):
	cells = row.find_all(["td", "th"])
	if len(cells) < max(filter(None, [version_idx, date_idx, manager_idx])) + 1:
	console.print(f" [dim]Row {i}: Skipping (not enough cells)[/dim]")
	continue

	release = parse_table_row(cells, i, version_idx, date_idx, manager_idx, release_type)
	if release:
	releases.append(release)

	return releases


	def parse_calendar_component(component: Any) -> CalendarEntry \| None:
	"""Parse a calendar component into a CalendarEntry."""
	if component.name != "VEVENT":
	return None

	summary = str(component.get("summary", ""))
	dtstart = component.get("dtstart")
	description = component.get("description", "")

	if not dtstart:
	return None

	# Handle both date and datetime objects
	if hasattr(dtstart.dt, "date"):
	start_date = datetime.combine(dtstart.dt.date(), datetime.min.time())
	elif isinstance(dtstart.dt, datetime):
	start_date = dtstart.dt
	else:
	start_date = datetime.combine(dtstart.dt, datetime.min.time())

	return CalendarEntry(
	summary=summary,
	start_date=start_date,
	description=str(description) if description else None,
	)


	def fetch_calendar_entries() -> list[CalendarEntry]:
	"""Fetch and parse calendar entries from iCal feed with retry logic."""
	console.print(f"[cyan]Fetching calendar:[/cyan] {CALENDAR_ICAL_URL}")

	max_retries = 3
	retry_delay = 10
	calendar_data = b""

	for attempt in range(1, 1 + max_retries):
	try:
	response = requests.get(CALENDAR_ICAL_URL, timeout=30)
	response.raise_for_status()
	calendar_data = response.content
	break
	except requests.RequestException as e:
	if attempt < max_retries - 1:
	console.print(f"[yellow]Attempt {attempt}/{max_retries} failed: {e}[/yellow]")
	console.print(f"[yellow]Retrying in {retry_delay} seconds...[/yellow]")
	time.sleep(retry_delay)
	else:
	console.print(f"[red]Failed to fetch calendar after {max_retries} attempts:[/red] {e}")
	sys.exit(1)

	console.print("[cyan]Parsing calendar entries...[/cyan]")
	calendar = Calendar.from_ical(calendar_data)
	entries = []

	for component in calendar.walk():
	entry = parse_calendar_component(component)
	if entry:
	entries.append(entry)

	console.print(f"[green]Found {len(entries)} calendar entries[/green]")
	return entries


	def normalize_name(name: str) -> str:
	"""Normalize a name by removing accents and converting to lowercase."""
	import unicodedata

	# Normalize unicode characters (NFD = decompose, then filter out combining marks)
	nfd = unicodedata.normalize("NFD", name)
	# Remove combining characters (accents)
	without_accents = "".join(char for char in nfd if unicodedata.category(char) != "Mn")
	return without_accents.lower().strip()


	def dates_match(release_date: datetime, entry_date: datetime) -> bool:
	"""Check if two dates match (same year, month, and day)."""
	return (
	entry_date.year == release_date.year
	and entry_date.month == release_date.month
	and entry_date.day == release_date.day
	)


	def check_release_type_match(release_type: str, summary: str) -> bool:
	"""Check if release type matches the calendar entry summary."""
	normalized_summary = normalize_name(summary)
	normalized_release_type = normalize_name(release_type)

	# Check if release type is in the summary (case-insensitive, accent-insensitive)
	if normalized_release_type in normalized_summary:
	return True

	# Handle "Airflow Ctl" vs "Airflow CTL" variations
	if "airflow" in normalized_release_type and "ctl" in normalized_release_type:
	return "airflow" in normalized_summary and "ctl" in normalized_summary

	return False


	def check_version_match(version: str, summary: str) -> bool:
	"""Check if version appears in the calendar entry summary."""
	return version in summary


	def check_manager_match(manager_name: str, summary: str) -> bool:
	"""Check if manager's name appears in the calendar entry summary."""
	import re

	normalized_manager = normalize_name(manager_name)
	normalized_summary = normalize_name(summary)

	# Check if manager name appears anywhere in summary
	if normalized_manager in normalized_summary:
	return True

	# Check if the manager appears as a word (not just substring)
	manager_pattern = r"\b" + re.escape(normalized_manager) + r"\b"
	return bool(re.search(manager_pattern, normalized_summary))


	def is_matching_entry(release: Release, entry: CalendarEntry) -> bool:
	"""
	Check if a calendar entry matches a release.

	A match requires:
	- Matching dates
	- Matching release type OR version
	- Matching release manager name
	"""
	if not dates_match(release.date, entry.start_date):
	return False

	release_type_match = check_release_type_match(release.release_type, entry.summary)
	version_match = check_version_match(release.version, entry.summary)
	manager_match = check_manager_match(release.release_manager, entry.summary)

	# Consider it a match if date + (type or version) + manager match
	return (release_type_match or version_match) and manager_match


	def find_matching_entry(release: Release, calendar_entries: list[CalendarEntry]) -> CalendarEntry \| None:
	"""Find a calendar entry that matches the given release, or None if not found."""
	for entry in calendar_entries:
	if is_matching_entry(release, entry):
	return entry
	return None


	def print_verification_header() -> None:
	"""Print the verification results header."""
	console.print("\n" + "=" * 80)
	console.print("[bold cyan]VERIFICATION RESULTS[/bold cyan]")
	console.print("=" * 80 + "\n")


	def print_matched_release(release: Release, entry: CalendarEntry) -> None:
	"""Print information about a matched release."""
	console.print(f"[green]✓ MATCHED:[/green] {release}")
	console.print(f" [dim]Calendar: {entry.summary}[/dim]")


	def print_unmatched_release(release: Release) -> None:
	"""Print information about an unmatched release."""
	console.print(f"[red]✗ NOT MATCHED:[/red] {release}")


	def print_verification_summary(
	total_releases: int, matched_count: int, unmatched_releases: list[Release]
	) -> None:
	"""Print the verification summary."""
	console.print("\n" + "=" * 80)
	console.print("[bold cyan]SUMMARY[/bold cyan]")
	console.print("=" * 80)
	console.print(f"Total releases in Confluence: {total_releases}")
	console.print(f"Matched releases: [green]{matched_count}[/green]")
	console.print(f"Unmatched releases: [red]{len(unmatched_releases)}[/red]")

	if unmatched_releases:
	console.print("\n[yellow]Unmatched releases:[/yellow]")
	for release in unmatched_releases:
	console.print(f" [yellow]•[/yellow] {release}")

	console.print("=" * 80 + "\n")


	def verify_releases(releases: list[Release], calendar_entries: list[CalendarEntry]) -> bool:
	"""Verify that all releases have matching calendar entries."""
	print_verification_header()

	all_matched = True
	unmatched_releases: list[Release] = []
	matched_count = 0

	for release in releases:
	matching_entry = find_matching_entry(release, calendar_entries)

	if matching_entry:
	print_matched_release(release, matching_entry)
	matched_count += 1
	else:
	all_matched = False
	unmatched_releases.append(release)
	print_unmatched_release(release)

	print_verification_summary(len(releases), matched_count, unmatched_releases)

	return all_matched


	def load_html_content(args: argparse.Namespace) -> str:
	"""Load HTML content from file or fetch from Confluence."""
	if args.load_html:
	console.print(f"[cyan]Loading HTML from file:[/cyan] {args.load_html}")
	return Path(args.load_html).read_text(encoding="utf-8")

	html_content = fetch_confluence_page()
	if args.save_html:
	console.print(f"[cyan]Saving HTML to file:[/cyan] {args.save_html}")
	Path(args.save_html).write_text(html_content, encoding="utf-8")
	return html_content


	def validate_releases(releases: list[Release]) -> None:
	"""Validate that releases were found, exit if not."""
	if not releases:
	console.print("[red]No releases found in Confluence page![/red]")
	sys.exit(1)


	def validate_calendar_entries(calendar_entries: list[CalendarEntry]) -> None:
	"""Validate that calendar entries were found, exit if not."""
	if not calendar_entries:
	console.print("[red]No calendar entries found![/red]")
	sys.exit(1)


	def print_final_result(all_matched: bool) -> None:
	"""Print the final result and exit with appropriate code."""
	if all_matched:
	console.print("[bold green]✓ All releases have matching calendar entries![/bold green]")
	sys.exit(0)
	else:
	console.print("[bold red]✗ Some releases do not have matching calendar entries![/bold red]")
	sys.exit(1)


	def main():
	"""Main function."""
	parser = argparse.ArgumentParser(
	description="Verify that planned releases in Confluence match Google Calendar entries"
	)
	parser.add_argument(
	"--save-html", metavar="FILE", help="Save the fetched Confluence HTML to a file for debugging"
	)
	parser.add_argument(
	"--load-html", metavar="FILE", help="Load Confluence HTML from a file instead of fetching"
	)
	args = parser.parse_args()

	# Fetch and parse data
	html_content = load_html_content(args)
	releases = parse_confluence_releases(html_content)
	validate_releases(releases)

	calendar_entries = fetch_calendar_entries()
	validate_calendar_entries(calendar_entries)

	# Verify and exit with appropriate code
	all_matched = verify_releases(releases, calendar_entries)
	print_final_result(all_matched)


	if __name__ == "__main__":
	main()