| #!/usr/bin/perl |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| # ------------------------------------------ |
| # compare.shingle.benchmark.jira.tables.pl |
| # |
| # Takes as cmdline parameters two JIRA-formatted benchmark results, as produced |
| # by shingle.bm2jira.pl (located in the same directory as this script), and |
| # outputs a third JIRA-formatted comparison table. |
| # |
| # The difference is calculated as a percentage: |
| # |
| # 100 * (unpatched-elapsed - patched-elapsed / patched-elapsed) |
| # |
| # where (un)patched-elapsed values have had the no-shingle-filter |
| # (StandardAnalyzer) elapsed time subtracted from them. |
| # |
| # |
| # Example shingle.bm2jira.pl output: |
| # ---------------------------------- |
| # JAVA: |
| # java version "1.5.0_15" |
| # Java(TM) 2 Runtime Environment, Standard Edition (build 1.5.0_15-b04) |
| # Java HotSpot(TM) 64-Bit Server VM (build 1.5.0_15-b04, mixed mode) |
| # |
| # OS: |
| # cygwin |
| # WinVistaService Pack 2 |
| # Service Pack 26060022202561 |
| # |
| # ||Max Shingle Size||Unigrams?||Elapsed|| |
| # |1 (Unigrams)|yes|2.19s| |
| # |2|no|4.74s| |
| # |2|yes|4.90s| |
| # |4|no|5.82s| |
| # |4|yes|5.97s| |
| |
| use strict; |
| use warnings; |
| |
| my $usage = "Usage: $0 <unpatched-file> <patched-file>\n"; |
| |
| die $usage unless ($#ARGV == 1 && -f $ARGV[0] && -f $ARGV[1]); |
| |
| my %stats = (); |
| |
| open UNPATCHED, "<$ARGV[0]" || die "ERROR opening '$ARGV[0]': $!"; |
| my $table_encountered = 0; |
| my $standard_analyzer_elapsed = 0; |
| my %unpatched_stats = (); |
| my %patched_stats = (); |
| while (<UNPATCHED>) { |
| unless ($table_encountered) { |
| if (/\Q||Max Shingle Size||Unigrams?||Elapsed||\E/) { |
| $table_encountered = 1; |
| } else { |
| print; |
| } |
| } elsif (/\|([^|]+)\|([^|]+)\|([\d.]+)s\|/) { |
| my $max_shingle_size = $1; |
| my $output_unigrams = $2; |
| my $elapsed = $3; |
| if ($max_shingle_size =~ /Unigrams/) { |
| $standard_analyzer_elapsed = $elapsed; |
| } else { |
| $unpatched_stats{$max_shingle_size}{$output_unigrams} = $elapsed; |
| } |
| } |
| } |
| close UNPATCHED; |
| |
| open PATCHED, "<$ARGV[1]" || die "ERROR opening '$ARGV[1]': $!"; |
| while (<PATCHED>) { |
| if (/\|([^|]+)\|([^|]+)\|([\d.]+)s\|/) { |
| my $max_shingle_size = $1; |
| my $output_unigrams = $2; |
| my $elapsed = $3; |
| if ($max_shingle_size =~ /Unigrams/) { |
| $standard_analyzer_elapsed = $elapsed |
| if ($elapsed < $standard_analyzer_elapsed); |
| } else { |
| $patched_stats{$max_shingle_size}{$output_unigrams} = $elapsed; |
| } |
| } |
| } |
| close PATCHED; |
| |
| print "||Max Shingle Size||Unigrams?||Unpatched||Patched||StandardAnalyzer||Improvement||\n"; |
| for my $max_shingle_size (sort { $a <=> $b } keys %unpatched_stats) { |
| for my $output_unigrams (sort keys %{$unpatched_stats{$max_shingle_size}}) { |
| my $improvement |
| = ( $unpatched_stats{$max_shingle_size}{$output_unigrams} |
| - $patched_stats{$max_shingle_size}{$output_unigrams}) |
| / ( $patched_stats{$max_shingle_size}{$output_unigrams} |
| - $standard_analyzer_elapsed); |
| $improvement = int($improvement * 1000 + .5) / 10; # Round and truncate |
| printf "|$max_shingle_size|$output_unigrams" |
| ."|$unpatched_stats{$max_shingle_size}{$output_unigrams}s" |
| ."|$patched_stats{$max_shingle_size}{$output_unigrams}s" |
| ."|${standard_analyzer_elapsed}s|%2.1f%%|\n", $improvement; |
| } |
| } |