blob: 03f0bbb163e9d7d8adc6faac01d479cdc3bbf863 [file] [log] [blame]
# Transforms Lucene Java's CHANGES.txt into Changes.html
# Input is on STDIN, output is to STDOUT
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
use strict;
use warnings;
my $jira_url_prefix = '';
my $title = undef;
my $release = undef;
my $sections = undef;
my $items = undef;
my $first_relid = undef;
my $second_relid = undef;
my @releases = ();
my @lines = <>; # Get all input at once
# Parse input and build hierarchical release structure in @releases
for (my $line_num = 0 ; $line_num <= $#lines ; ++$line_num) {
$_ = $lines[$line_num];
next unless (/\S/); # Skip blank lines
unless ($title) {
if (/\S/) {
s/^\s+//; # Trim leading whitespace
s/\s+$//; # Trim trailing whitespace
$title = $_;
if (/^(Release)|(Trunk)/) { # Release headings
$release = $_;
$sections = [];
push @releases, [ $release, $sections ];
($first_relid = lc($release)) =~ s/\s+/_/g if ($#releases == 0);
($second_relid = lc($release)) =~ s/\s+/_/g if ($#releases == 1);
$items = undef;
# Section heading: 2 leading spaces, words all capitalized
if (/^ ([A-Z]+)\s*/) {
my $heading = $_;
$items = [];
push @$sections, [ $heading, $items ];
# Handle earlier releases without sections - create a headless section
unless ($items) {
$items = [];
push @$sections, [ undef, $items ];
my $type;
if (@$items) { # A list item has been encountered in this section before
$type = $items->[0]; # 0th position of items array is list type
} else {
$type = get_list_type($_);
push @$items, $type;
if ($type eq 'numbered') { # The modern items list style
# List item boundary is another numbered item or an unindented line
my $line;
my $item = $_;
$item =~ s/^(\s{0,2}\d+\.\s*)//; # Trim the leading item number
my $leading_ws_width = length($1);
$item =~ s/\s+$//; # Trim trailing whitespace
$item .= "\n";
while ($line_num < $#lines
and ($line = $lines[++$line_num]) !~ /^(?:\s{0,2}\d+\.\s*\S|\S)/) {
$line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
$line =~ s/\s+$//; # Trim trailing whitespace
$item .= "$line\n";
$item =~ s/\n+\Z/\n/; # Trim trailing blank lines
push @$items, $item;
--$line_num unless ($line_num == $#lines);
} elsif ($type eq 'paragraph') { # List item boundary is a blank line
my $line;
my $item = $_;
$item =~ s/^(\s+)//;
my $leading_ws_width = defined($1) ? length($1) : 0;
$item =~ s/\s+$//; # Trim trailing whitespace
$item .= "\n";
while ($line_num < $#lines and ($line = $lines[++$line_num]) =~ /\S/) {
$line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
$line =~ s/\s+$//; # Trim trailing whitespace
$item .= "$line\n";
push @$items, $item;
--$line_num unless ($line_num == $#lines);
} else { # $type is one of the bulleted types
# List item boundary is another bullet or a blank line
my $line;
my $item = $_;
$item =~ s/^(\s*$type\s*)//; # Trim the leading bullet
my $leading_ws_width = length($1);
$item =~ s/\s+$//; # Trim trailing whitespace
$item .= "\n";
while ($line_num < $#lines
and ($line = $lines[++$line_num]) !~ /^\s*(?:$type|\Z)/) {
$line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
$line =~ s/\s+$//; # Trim trailing whitespace
$item .= "$line\n";
push @$items, $item;
--$line_num unless ($line_num == $#lines);
# Print HTML-ified version to STDOUT
** WARNING: This file is generated from CHANGES.txt by the
** Perl script ''.
** Do *not* edit this file!
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
<link rel="stylesheet" href="ChangesFancyStyle.css" title="Fancy">
<link rel="alternate stylesheet" href="ChangesSimpleStyle.css" title="Simple">
<META http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
function toggleList(e) {
element = document.getElementById(e).style;
element.display == 'none' ? element.display = 'block' : element.display='none';
function collapse() {
for (var i = 0; i < document.getElementsByTagName("ul").length; i++) {
var list = document.getElementsByTagName("ul")[i];
if ( != '$first_relid' && != '$second_relid') { = "none";
for (var i = 0; i < document.getElementsByTagName("ol").length; i++) {
document.getElementsByTagName("ol")[i].style.display = "none";
window.onload = collapse;
<a href=""><img class="logoImage" alt="Hive" src="images/hive-logo.jpg" title="SQL and Data Warehousing Platform on Hadoop"></a>
my $heading;
my $relcnt = 0;
my $header = 'h2';
for my $rel (@releases) {
if (++$relcnt == 3) {
$header = 'h3';
print "<h2><a href=\"javascript:toggleList('older')\">";
print "Older Releases";
print "</a></h2>\n";
print "<ul id=\"older\">\n"
($release, $sections) = @$rel;
# The first section heading is undefined for the older sectionless releases
my $has_release_sections = $sections->[0][0];
(my $relid = lc($release)) =~ s/\s+/_/g;
print "<$header><a href=\"javascript:toggleList('$relid')\">";
print "$release";
print "</a></$header>\n";
print "<ul id=\"$relid\">\n"
if ($has_release_sections);
for my $section (@$sections) {
($heading, $items) = @$section;
(my $sectid = lc($heading)) =~ s/\s+/_/g;
my $numItemsStr = $#{$items} > 0 ? "($#{$items})" : "(none)";
print " <li><a href=\"javascript:toggleList('$relid.$sectid')\">",
($heading || ''), "</a>&nbsp;&nbsp;&nbsp;$numItemsStr\n"
if ($has_release_sections);
my $list_type = $items->[0] || '';
my $list = ($has_release_sections || $list_type eq 'numbered' ? 'ol' : 'ul');
my $listid = $sectid ? "$relid.$sectid" : $relid;
print " <$list id=\"$listid\">\n";
for my $itemnum (1..$#{$items}) {
my $item = $items->[$itemnum];
$item =~ s:&:&amp;:g; # Escape HTML metachars
$item =~ s:<:&lt;:g;
$item =~ s:>:&gt;:g;
$item =~ s:\s*(\([^)"]+?\))\s*$:<br />$1:; # Separate attribution
$item =~ s:\n{2,}:\n<p/>\n:g; # Keep paragraph breaks
$item =~ s{(?:${jira_url_prefix})?(HADOOP-\d+)} # Link to JIRA
{<a href="${jira_url_prefix}$1">$1</a>}g;
print " <li>$item</li>\n";
print " </$list>\n";
print " </li>\n" if ($has_release_sections);
print "</ul>\n" if ($has_release_sections);
print "</ul>\n" if ($relcnt > 3);
print "</body>\n</html>\n";
# Subroutine: get_list_type
# Takes one parameter:
# - The first line of a sub-section/point
# Returns one scalar:
# - The list type: 'numbered'; or one of the bulleted types '-', or '.' or
# 'paragraph'.
sub get_list_type {
my $first_list_item_line = shift;
my $type = 'paragraph'; # Default to paragraph type
if ($first_list_item_line =~ /^\s{0,2}\d+\.\s+\S+/) {
$type = 'numbered';
} elsif ($first_list_item_line =~ /^\s*([-.])\s+\S+/) {
$type = $1;
return $type;