| #!/usr/bin/perl |
| ## Licensed to the Apache Software Foundation (ASF) under one |
| ## or more contributor license agreements. See the NOTICE file |
| ## distributed with this work for additional information |
| ## regarding copyright ownership. The ASF licenses this file |
| ## to you under the Apache License, Version 2.0 (the |
| ## "License"); you may not use this file except in compliance |
| ## with the License. You may obtain a copy of the License at |
| ## |
| ## http://www.apache.org/licenses/LICENSE-2.0 |
| ## |
| ## Unless required by applicable law or agreed to in writing, software |
| ## distributed under the License is distributed on an "AS IS" BASIS, |
| ## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ## See the License for the specific language governing permissions and |
| ## limitations under the License. |
| |
| # Grammar into HTML |
| # Read in sparql.txt and the tokens.txt file |
| |
| ## ToDo: |
| ## Check tokens exist and are used |
| ## Validate |
| |
| |
| if ( $#ARGV != 1 ) |
| { |
| print STDERR "Usage: grammar.txt tokens.txt\n" ; |
| exit 1 ; |
| } |
| |
| $/ = undef ; |
| # Just table or full page. |
| $TABLE = 1 ; |
| |
| $grammarFile = $ARGV[0] ; |
| $tokensFile = $ARGV[1] ; |
| |
| $grammar = &readFile($grammarFile) ; |
| $tokens = &readFile($tokensFile) ; |
| |
| $grammar =~ s!DOCUMENT START!! ; |
| # $grammar =~ s!NON-TERMINALS!! ; |
| $grammar =~ s!DOCUMENT END!! ; |
| $grammar =~ s!TOKENS.*NON-TERMINALS!!s ; |
| |
| $grammar =~ s!//.*!!g ; |
| $grammar =~ s!\r!!g ; |
| |
| # remove leading whitespace |
| $grammar =~ s!^[\n\s]*!\n! ; |
| |
| # Merge alts |
| $grammar =~ s!\n\s*\|!\ |!g ; |
| |
| $tokens =~ s!//.*!!g ; |
| $tokens =~ s!\r!!g ; |
| |
| |
| ## Grammar |
| #print "GRAMMAR\n" ; |
| |
| @g = split(/\n\s*/, $grammar) ; |
| |
| @rules = () ; |
| %ruleMap = () ; |
| %tokenMap = () ; |
| %inline = () ; |
| |
| # Grammar rules |
| # Direct from "jjdoc -TEXT=true" |
| |
| for $g (@g) |
| { |
| ($rulename, $rulebody) = split(/:=/,$g) ; |
| |
| $rulename =~ s!^\s*!! ; |
| $rulename =~ s!\s*$!! ; |
| |
| $rulebody =~ s!^\s*!! ; |
| $rulebody =~ s!\s*$!! ; |
| |
| # Remove outer brackets |
| # $rulebody =~ s!^\((.*)\)$!$1! ; |
| |
| # Remove <> around tokens in grammar. |
| ## Now done very late (as <>) in fixups. |
| ## $rulebody =~ s/\<(\w+)\>/$1/g ; |
| # Leave in - so tokens distinguished from rules |
| |
| next if $rulename eq '' ; |
| #next if $rulebody eq '' ; |
| |
| # Skip the root rule. |
| next if ( $rulename eq 'CompilationUnit' ) ; |
| |
| $rulebody = 'Perl 5 regular expression' |
| if ( $rulename eq 'PatternLiteral' ) ; |
| |
| push @rules, $rulename ; |
| warn "Duplicate rule (grammar): $rulename\n" if defined($ruleMap{$rulename}) ; |
| $ruleMap{$rulename} = $rulebody ; |
| |
| ## print "----------\n" ; |
| ## print $rulename,"\n" ; |
| ## print $rulebody,"\n" ; |
| |
| } |
| |
| |
| # Tokens |
| # Produced by "jj2tokens" |
| # Hand edited to indicate the inlines |
| |
| $tokens =~ s/\n+/\n/g ; |
| $tokens =~ s/^\n// ; |
| |
| @t = split(/\n(?=\<|\[)/, $tokens) ; |
| |
| for $t (@t) |
| { |
| ($tokenname,$tokenbody) = split(/::=/, $t) ; |
| $tokenname =~ s!^\s*!! ; |
| $tokenname =~ s!\s*$!! ; |
| |
| ## # remove <> around tokens |
| ## Do very late as a formatting fix up. |
| ## $tokenname =~ s/^\<// ; |
| ## $tokenname =~ s/\>$// ; |
| |
| $tokenname =~ s/#// ; |
| |
| $tokenbody =~ s!^\s*!! ; |
| $tokenbody =~ s!\s*$!! ; |
| |
| # <> round tokens |
| # Remove at last minute. |
| |
| # Remove outer () |
| # $tokenbody =~ s!^\((.*)\)$!$1! ; |
| |
| # Inline? |
| if ( $tokenname =~ /^\[\<\w*\>\]/ ) |
| { |
| warn "Duplicate inline (token): $tokenname\n" if defined($inline{$tokenname}) ; |
| $tokenname =~ s/^\[//g ; |
| $tokenname =~ s/\]$//g ; |
| $tokenbody =~ s/"/'/g ; # '" -- But not literal " -- how? |
| $tokenbody =~ s/\<\>\'\{\}/\<\>\"\{\}/ ; # '" IRI fixup |
| |
| $inline{$tokenname} = $tokenbody ; |
| |
| #print "INLINE: ",$tokenname," => ",$tokenbody,"\n" ; |
| } |
| else |
| { |
| push @rules, $tokenname ; |
| warn "Duplicate rule (token): $tokenname\n" if defined($tokenMap{$tokenname}) ; |
| $ruleMap{$tokenname} = $tokenbody ; |
| } |
| } |
| |
| # Table |
| |
| if ( ! $TABLE ) |
| { |
| print "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" ; |
| print "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n" ; |
| print " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" ; |
| print "\n" ; |
| |
| print "<html>\n"; |
| print "<head>\n"; |
| print "<title>SPARQL Grammar</title>\n" ; |
| print "<style type=\"text/css\">\n" ; |
| |
| # .token inline |
| # .ruleHead |
| # .ruleBody |
| |
| print <<'EOF' ; |
| div.grammarTable table * { border-width: 0 ; } |
| div.grammarTable table * tr { border: 1px solid black ; } |
| |
| .grammar { text-align: left ; vertical-align: top ; } |
| .token { color: #3f3f5f; } |
| .gRuleHead { font-style: italic ; font-family: monospace ; } |
| .gRuleBody { font-family: monospace ; } |
| .gRuleLabel { font-family: monospace ; } |
| EOF |
| |
| print "</style>\n" ; |
| print "</head>\n"; |
| print "<body>\n"; |
| |
| print "\n" ; |
| } |
| |
| print "<div class=\"grammarTable\">\n" ; |
| print " <table><tbody>\n" ; |
| |
| $ruleNum = 0 ; |
| |
| for $r (@rules) |
| { |
| $DEBUG = 0 ; |
| $ruleNum++ ; |
| $rulename = $r ; |
| |
| $rulebody = $ruleMap{$rulename} ; |
| |
| ## $DEBUG = 1 if ( $rulename =~ /Prolog/ ) ; |
| |
| $rb = $rulebody ; |
| |
| if ( $DEBUG ) |
| { |
| print STDERR "\n" ; |
| print STDERR "Rule: $rulename\n" ; |
| print STDERR "Body: $rulebody\n" ; |
| } |
| |
| ## Do before '||' substitution |
| # Not perfect - some fixups later. |
| #$rb =~ s%\|%\<br/\>\|%g ; |
| |
| # Escape HTML chars before adding markup. |
| $rb = esc($rb) ; |
| |
| # Inlines |
| for $k (keys %inline) |
| { |
| $s = span('token', $inline{$k}) ; |
| $k = esc($k) ; |
| # Assumes escaped <> round tokens. |
| $k = quotemeta $k ; |
| $rb =~ s/$k/$s/g ; |
| |
| } |
| |
| if ( $DEBUG ) |
| { |
| print STDERR "After inlining\n" ; |
| print STDERR $rb,"\n" ; ; |
| } |
| |
| |
| # Add hrefs - issue if one is a substring of another \W helps. |
| |
| for $k (keys %ruleMap) |
| { |
| $s = href("r-".$k,$k) ; |
| |
| $k = esc($k) ; |
| $k = quotemeta $k ; |
| |
| ## if ( $DEBUG ) |
| ## { |
| ## print STDERR "K:$k\n" ; |
| ## } |
| |
| |
| $rb =~ s/(?=\W)(\s*)$k(\s*)(?=\W)/$1$s$2/g ; |
| $rb =~ s/^$k(\s*)(?=\W)/$s$1/g ; |
| $rb =~ s/(?=\W)(\s*)$k$/$1$s/g ; |
| $rb =~ s/^$k$/$s/g ; |
| } |
| |
| if ( $DEBUG ) |
| { |
| print STDERR "After hrefs\n" ; |
| print STDERR $rb,"\n" ; ; |
| } |
| |
| #exit if $ruleNum > 2 ; |
| |
| $rn = anchor("r-".$rulename, $rulename) ; |
| $rn = fixupHead($rn) ; |
| |
| if($rulename eq 'IRIREF') |
| { |
| print " </tbody></table>\n" ; |
| print "</div>\n" ; |
| print "<p>Productions for terminals:</p>\n" ; |
| print "<div class=\"grammarTable\">\n" ; |
| print " <table><tbody>\n" ; |
| } |
| |
| |
| print "\n" ; |
| print "<tr valign=\"baseline\">\n" ; |
| $rlabel = '[' . $ruleNum . '] ' ; |
| |
| print " <td>",code('gRuleLabel', $rlabel),"</td>\n" ; |
| |
| #print " <td>",span('gRuleHead', $rn),"</td>\n" ; |
| print " <td>",code('gRuleHead',$rn),"</td>\n" ; |
| |
| print " <td> ::= </td>\n" ; |
| |
| $rb = fixupRule($rulename, $rb) ; |
| print " <td>",code('gRuleBody',$rb),"</td>\n" ; |
| |
| print "</tr>\n" ; |
| |
| # $rule{$rulename, $rulebody) ; |
| # print $rulename , "\n" ; |
| } |
| |
| print " </tbody></table>\n" ; |
| print "</div>\n" ; |
| |
| if ( !$TABLE ) |
| { |
| print "\n" ; |
| print "</body>\n" ; |
| print "</html>\n" ; |
| } |
| |
| sub readFile |
| { |
| my $f = $_[0] ; |
| open(F, "$f") || die "$!"; |
| my $s = <F> ; |
| return $s ; |
| } |
| |
| sub esc |
| { |
| my $s = $_[0] ; |
| $s =~ s/&/&/g ; |
| $s =~ s/</</g ; |
| $s =~ s/>/>/g ; |
| return $s ; |
| } |
| |
| sub span |
| { |
| my $c = $_[0] ; |
| my $t = $_[1] ; |
| $t = esc($t) ; |
| my $s = '<span class="' . $c . '">' . $t . '</span>' ; |
| return $s ; |
| } |
| |
| sub href |
| { |
| my $a = $_[0] ; |
| my $t = $_[1] ; |
| $a = sane($a) ; |
| $t = esc($t) ; |
| my $s = '<a href="#' . $a . '">' . $t . '</a>' ; |
| return $s ; |
| } |
| |
| sub anchor |
| { |
| my $a = $_[0] ; |
| my $t = $_[1] ; |
| $a = sane($a) ; |
| $t = esc($t) ; |
| my $s = '<a id="' . $a . '" name="' . $a . '">' . $t . '</a>' ; |
| return $s ; |
| } |
| |
| sub sane |
| { |
| my $a = $_[0] ; |
| $a =~ s/\W//g ; |
| return $a ; |
| } |
| |
| sub code |
| { |
| my $c = $_[0] ; |
| my $t = $_[1] ; |
| return '<code class="' . $c . '">' . $t . '</code>' ; |
| } |
| |
| sub fixupHead |
| { |
| my $head = $_[0] ; |
| # Remove <> around tokens. |
| $head =~ s/<(\w+)>/$1/g ; |
| return $head ; |
| } |
| |
| sub fixupRule |
| { |
| my $head = $_[0] ; |
| my $body = $_[1] ; |
| |
| # Remove unnecessary () |
| $body =~ s/\(\s*([^()| ]*) \)/$1/g ; |
| |
| ## if ( $body =~ m!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)! ) |
| ## { |
| ## $b = $body ; |
| ## print "================================\n" ; |
| ## print STDERR "$b\n" ; |
| ## print STDERR "--------\n" ; |
| ## $b =~ s!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)!$1!g ; |
| ## $b =~ s!\(\s+(\<span[^>]*\>[^<>]*\</span\>)\s+\)!$1!g ; |
| ## print STDERR "$b\n" ; |
| ## print STDERR "=====\n" ; |
| ## print STDERR "\n" ; |
| ## } |
| |
| |
| # Remove outer matching () where there are no inner () |
| $body =~ s/^\(\s+([^\(]*)\s+\)$/$1/ ; |
| |
| # ( A )* => A* and for + and ? where A is a linked or spanned object |
| $body =~ s!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)!$1!g ; |
| $body =~ s!\(\s+(\<span[^>]*\>[^<>]*\</span\>)\s+\)!$1!g ; |
| |
| # There aren't any of these |
| ## $body =~ s!\(\s+(\S*)\s+\)!$1!g ; |
| |
| # Remove <> around tokens. |
| $body =~ s/<(\w+)>/$1/g ; |
| |
| # Specials |
| # Split long bodies |
| if ( $head eq "CallExpression" || |
| $head eq "UnaryExpression" || |
| $head eq "<NCCHAR1p>" || |
| $head eq "PatternElement" || |
| $head eq "BuiltInCall" ) |
| { |
| $body =~ s%\|%\<br/\>\|%g ; |
| $body =~ s/^\s+// ; |
| $body = " ".$body ; |
| } |
| |
| if ( $head eq "Aggregate" ) |
| { |
| # Strip outer () |
| $body =~ s/^\(\s*(.*)\s*\)$/$1/ ; |
| ## Be careful of nested | in COUNT |
| $body =~ s%(\| \<span class="token")%\<br/\>$1%g ; |
| $body =~ s/^\s+// ; |
| $body = " ".$body ; |
| } |
| |
| |
| if ( $head eq "BuiltInCall" ) |
| { |
| # Undo <br/> for BNODE, RAND |
| # <br/>| <a href="#rNIL">NIL</a> ) |
| $body =~ s%\<br/\>\| *\<a href="#rNIL"\>NIL\</a\>%\| \<a href="#rNIL"\>NIL\</a\>%g ; |
| } |
| |
| if ( $head eq "RelationalExpression" || |
| $head eq "AdditiveExpression" || |
| $head eq "MultiplicativeExpression" || |
| $head eq "ConditionalOrExpression") |
| { |
| $body =~ s%\*\(%<br/>\(% ; |
| } |
| |
| # These failed the outer () test because they have nested () in them |
| if ( $head eq "QueryPattern" || |
| $head eq "OrderCondition" ) |
| { |
| # Remove outer () |
| $body =~ s/^\((.*)\)$/$1/ ; |
| } |
| |
| if ( $head eq "Query" ) |
| { |
| $body =~ s! \(!<br/>\(! ; |
| $body =~ s!\) !\)<br/>! ; |
| } |
| |
| if ( $head =~ m/(Select|Construct|Describe|Ask)Query/ ) |
| { |
| # Put a line break before the DatasetClause |
| # <a href="#rDatasetClause">DatasetClause</a> |
| $c = '<a href="#rDatasetClause">DatasetClause</a>' ; |
| $c = quotemeta $c ; |
| # Expects the dataset clause to be unbracketted |
| $body =~ s!(\(\s*$c)!<br/>$1! ; |
| } |
| |
| if ( $head eq "OrderCondition" ) |
| { |
| $body =~ s!\)\s*\|\s*\(!\)<br/>\| \(! ; |
| $body = " ".$body ; |
| } |
| |
| #Rules where an outer () is unnecessary. |
| if ( $head eq "GroupCondition" || |
| $head eq "LimitOffsetClauses" || |
| $head eq "GraphOrDefault" || |
| $head eq "ArgList" || |
| $head eq "ExpressionList" || |
| $head eq "PathPrimary" || |
| $head eq "PathMod" || |
| $head eq "PathPrimary" || |
| $head eq "PathNegatedPropertySet" || |
| $head eq "PathOneInPropertySet") |
| { |
| $body =~ s/^\(\s*(.*)\s*\)$/$1/ ; |
| } |
| return $body ; |
| } |