| # please insert nothing before this line: -*- mode: cperl; cperl-indent-level: 4; cperl-continued-statement-offset: 4; indent-tabs-mode: nil -*- |
| package ModPerl::CScan; |
| |
| require Exporter; |
| use Config '%Config'; |
| use File::Basename; |
| |
| # NOTE to distributors: this module is needed only for mp2 developers, |
| # it's not a requirement for mod_perl users |
| use Data::Flow qw(0.05); |
| |
| use strict; # Earlier it catches ISA and EXPORT. |
| |
| @ModPerl::CScan::ISA = qw(Exporter Data::Flow); |
| |
| # Items to export into callers namespace by default. Note: do not export |
| # names by default without a very good reason. Use EXPORT_OK instead. |
| # Do not simply export all your public functions/methods/constants. |
| |
| @ModPerl::CScan::EXPORT = qw( |
| ); |
| @ModPerl::CScan::EXPORT_OK = qw( |
| ); |
| # this flag tells cpp to only output macros |
| $ModPerl::CScan::MACROS_ONLY = '-dM'; |
| |
| $ModPerl::CScan::VERSION = '0.75'; |
| |
| my (%keywords,%style_keywords); |
| for (qw(asm auto break case char continue default do double else enum |
| extern float for fortran goto if int long register return short |
| sizeof static struct switch typedef union unsigned signed while void volatile)) { |
| $keywords{$_}++; |
| } |
| for (qw(bool class const delete friend inline new operator overload private |
| protected public virtual)) { |
| $style_keywords{'C++'}{$_}++; |
| } |
| for (qw(__func__ _Complex _Imaginary _Bool inline restrict)) { |
| $style_keywords{'C9X'}{$_}++; |
| } |
| for (qw(inline const asm noreturn section |
| constructor destructor unused weak)) { |
| $style_keywords{'GNU'}{$_}++; |
| $style_keywords{'GNU'}{"__$ {_}__"}++; |
| } |
| $style_keywords{'GNU'}{__attribute__}++; |
| $style_keywords{'GNU'}{__extension__}++; |
| $style_keywords{'GNU'}{__consts}++; |
| $style_keywords{'GNU'}{__const}++; |
| $style_keywords{'GNU'}{__restrict}++; |
| |
| my $recipes |
| = { Defines => { default => '' }, |
| cppstdin => { default => $Config{cppstdin} }, |
| cppflags => { default => $Config{cppflags} }, |
| cppminus => { default => $Config{cppminus} }, |
| c_styles => { default => [qw(C++ GNU C9X)] }, |
| add_cppflags => { default => '' }, |
| keywords => { prerequisites => ['c_styles'], |
| output => sub { |
| my %kw = %keywords; |
| my %add; |
| for ( @{ shift->{c_styles} } ) { |
| %add = %{ $style_keywords{$_} }; |
| %kw = (%kw, %add); |
| } |
| \%kw; |
| }, }, |
| 'undef' => { default => undef }, |
| filename_filter => { default => undef }, |
| full_text => { class_filter => [ 'text', 'C::Preprocessed', |
| qw(undef filename Defines includeDirs Cpp)] }, |
| text => { class_filter => [ 'text', 'C::Preprocessed', |
| qw(filename_filter filename Defines includeDirs Cpp)] }, |
| text_only_from => { class_filter => [ 'text_only_from', 'C::Preprocessed', |
| qw(filename_filter filename Defines includeDirs Cpp)] }, |
| includes => { filter => [ \&includes, |
| qw(filename Defines includeDirs Cpp) ], }, |
| includeDirs => { prerequisites => ['filedir'], |
| output => sub { |
| my $data = shift; |
| [ $data->{filedir}, '/usr/local/include', '.']; |
| } }, |
| Cpp => { prerequisites => [qw(cppminus add_cppflags cppflags cppstdin)], |
| output => sub { |
| my $data = shift; |
| return { cppstdin => $data->{cppstdin}, |
| cppflags => "$data->{cppflags} $data->{add_cppflags}", |
| cppminus => $data->{cppminus} }; |
| } }, |
| filedir => { output => sub { dirname ( shift->{filename} || '.' ) } }, |
| sanitized => { filter => [ \&sanitize, 'text'], }, |
| toplevel => { filter => [ \&top_level, 'sanitized'], }, |
| full_sanitized => { filter => [ \&sanitize, 'full_text'], }, |
| full_toplevel => { filter => [ \&top_level, 'full_sanitized'], }, |
| no_type_decl => { filter => [ \&remove_type_decl, 'toplevel'], }, |
| typedef_chunks => { filter => [ \&typedef_chunks, 'full_toplevel'], }, |
| struct_chunks => { filter => [ \&struct_chunks, 'full_toplevel'], }, |
| typedefs_whited => { filter => [ \&typedefs_whited, |
| 'full_sanitized', 'typedef_chunks', |
| 'keywords_rex'], }, |
| typedef_texts => { filter => [ \&typedef_texts, |
| 'full_text', 'typedef_chunks'], }, |
| struct_texts => { filter => [ \&typedef_texts, |
| 'full_text', 'struct_chunks'], }, |
| typedef_hash => { filter => [ \&typedef_hash, |
| 'typedef_texts', 'typedefs_whited'], }, |
| typedef_structs => { filter => [ \&typedef_structs, |
| 'typedef_hash', 'struct_texts'], }, |
| typedefs_maybe => { filter => [ sub {[keys %{+shift}]}, |
| 'typedef_hash'], }, |
| defines_maybe => { filter => [ \&defines_maybe, 'filename'], }, |
| defines_no_args => { prerequisites => ['defines_maybe'], |
| output => sub { shift->{defines_maybe}->[0] }, }, |
| defines_args => { prerequisites => ['defines_maybe'], |
| output => sub { shift->{defines_maybe}->[1] }, }, |
| |
| defines_full => { filter => [ \&defines_full, |
| qw(filename Defines includeDirs Cpp) ], }, |
| defines_no_args_full => { prerequisites => ['defines_full'], |
| output => sub { shift->{defines_full}->[0] }, }, |
| defines_args_full => { prerequisites => ['defines_full'], |
| output => sub { shift->{defines_full}->[1] }, }, |
| |
| decl_inlines => { filter => [ \&functions_in, 'no_type_decl'], }, |
| inline_chunks => { filter => [ sub { shift->[0] }, 'decl_inlines'], }, |
| inlines => { filter => [ \&from_chunks, 'inline_chunks', 'text'], }, |
| decl_chunks => { filter => [ sub { shift->[1] }, 'decl_inlines'], }, |
| decls => { filter => [ \&from_chunks, 'decl_chunks', 'text'], }, |
| fdecl_chunks => { filter => [ sub { shift->[4] }, 'decl_inlines'], }, |
| fdecls => { filter => [ \&from_chunks, 'fdecl_chunks', 'text'], }, |
| mdecl_chunks => { filter => [ sub { shift->[2] }, 'decl_inlines'], }, |
| mdecls => { filter => [ \&from_chunks, 'mdecl_chunks', 'text'], }, |
| vdecl_chunks => { filter => [ sub { shift->[3] }, 'decl_inlines'], }, |
| vdecls => { filter => [ \&from_chunks, 'vdecl_chunks', 'text'], }, |
| vdecl_hash => { filter => [ \&vdecl_hash, 'vdecls', 'mdecls' ], }, |
| parsed_fdecls => { filter => [ \&do_declarations, 'fdecls', |
| 'typedef_hash', 'keywords'], }, |
| parsed_inlines => { filter => [ \&do_declarations, 'inlines', |
| 'typedef_hash', 'keywords'], }, |
| keywords_rex => { filter => [ sub { my @k = keys %{ shift() }; |
| local $" = '|'; |
| my $r = "(?:@k)"; |
| eval 'qr/$r/' or $r # Older Perls |
| }, 'keywords'], }, |
| }; |
| |
| sub from_chunks { |
| my $chunks = shift; |
| my $txt = shift; |
| my @out; |
| my $i = 0; |
| while ($i < @$chunks) { |
| push @out, substr $txt, $chunks->[$i], $chunks->[ $i + 1 ] - $chunks->[$i]; |
| $i += 2; |
| } |
| \@out; |
| } |
| |
| #sub process { request($recipes, @_) } |
| # Preloaded methods go here. |
| |
| sub includes { |
| my %seen; |
| my $stream = new C::Preprocessed (@_) |
| or die "Cannot open pipe from cppstdin: $!\n"; |
| |
| while (<$stream>) { |
| next unless m(^\s*\#\s* # Leading hash |
| (line\s*)? # 1: Optional line |
| ([0-9]+)\s* # 2: Line number |
| (.*) # 3: The rest |
| )x; |
| my $include = $3; |
| $include = $1 if $include =~ /"(.*)"/; # Filename may be in quotes |
| $include =~ s,\\\\,/,g if $^O eq 'os2'; |
| $seen{$include}++ if $include ne ""; |
| } |
| [keys %seen]; |
| } |
| |
| sub defines_maybe { |
| my $file = shift; |
| my ($mline,$line,%macros,%macrosargs,$sym,$args); |
| open(C, $file) or die "Cannot open file $file: $!\n"; |
| while (not eof(C) and $line = <C>) { |
| next unless |
| ( $line =~ s[ |
| ^ \s* \# \s* # Start of directive |
| define \s+ |
| (\w+) # 1: symbol |
| (?: |
| \( (.*?) \s* \) # 2: Minimal match for arguments |
| # in parenths (without trailing |
| # spaces) |
| )? # optional, no grouping |
| \s* # rest is the definition |
| ([\s\S]*) # 3: the rest |
| ][]x ); |
| ($sym, $args, $mline) = ($1, $2, $3); |
| $mline .= <C> while not eof(C) and $mline =~ s/\\\n/\n/; |
| chomp $mline; |
| #print "sym: `$sym', args: `$args', mline: `$mline'\n"; |
| if (defined $args) { |
| $macrosargs{$sym} = [ [split /\s*,\s*/, $args], $mline]; |
| } else { |
| $macros{$sym} = $mline; |
| } |
| } |
| close(C) or die "Cannot close file $file: $!\n"; |
| [\%macros, \%macrosargs]; |
| } |
| |
| sub defines_full { |
| my $Cpp = $_[3]; |
| my ($mline,$line,%macros,%macrosargs,$sym,$args); |
| |
| # save the old cppflags and add the flag for only ouputting macro definitions |
| my $old_cppstdin = $Cpp->{'cppstdin'}; |
| $Cpp->{'cppstdin'} = $old_cppstdin . " " . $ModPerl::CScan::MACROS_ONLY; |
| |
| my $stream = new C::Preprocessed (@_) |
| or die "Cannot open pipe from cppstdin: $!\n"; |
| |
| while (defined ($line = <$stream>)) { |
| next unless |
| ( $line =~ s[ |
| ^ \s* \# \s* # Start of directive |
| define \s+ |
| (\w+) # 1: symbol |
| (?: |
| \( (.*?) \s* \) # 2: Minimal match for arguments |
| # in parenths (without trailing |
| # spaces) |
| )? # optional, no grouping |
| \s* # rest is the definition |
| ([\s\S]*) # 3: the rest |
| ][]x ); |
| ($sym, $args, $mline) = ($1, $2, $3); |
| $mline .= <$stream> while ($mline =~ s/\\\n/\n/); |
| chomp $mline; |
| #print STDERR "sym: `$sym', args: `$args', mline: `$mline'\n"; |
| if (defined $args) { |
| $macrosargs{$sym} = [ [split /\s*,\s*/, $args], $mline]; |
| } else { |
| $macros{$sym} = $mline; |
| } |
| } |
| # restore the original cppflags |
| $Cpp->{'cppstdin'} = $old_cppstdin; |
| [\%macros, \%macrosargs]; |
| } |
| |
| sub typedef_chunks { # Input is toplevel, output: starts and ends |
| my $txt = shift; |
| pos $txt = 0; |
| my ($b, $e, @out); |
| while ($txt =~ /\btypedef\b/g) { |
| push @out, pos $txt; |
| $txt =~ /(?=;)|\Z/g; |
| push @out, pos $txt; |
| } |
| \@out; |
| } |
| |
| sub struct_chunks { |
| my $txt = shift; |
| pos $txt = 0; |
| my ($b, $e, @out); |
| while ($txt =~ /\b(?=struct\s*(\w*\s*)?\{)/g) { |
| push @out, pos $txt; |
| $txt =~ /(?=;)|\Z/g; |
| push @out, pos $txt; |
| } |
| \@out; |
| } |
| |
| sub typedefs_whited { # Input is sanitized text, and list of beg/end. |
| my @lst = @{$_[1]}; |
| my @out; |
| my ($b, $e); |
| while ($b = shift @lst) { |
| $e = shift @lst; |
| push @out, whited_decl($_[2], substr $_[0], $b, $e - $b); |
| } |
| \@out; |
| } |
| |
| sub structs_whited { |
| my @lst = @{$_[1]}; |
| my @out; |
| my ($b, $e, $in); |
| while ($b = shift @lst) { |
| $e = shift @lst; |
| $in = substr $_[0], $b, $e - $b; |
| $in =~ s/^(struct\s*(\w*\s*)?)(.*)$/$1 . " " x length($3)/es; |
| push @out, $in; |
| } |
| \@out; |
| } |
| |
| sub typedef_texts { |
| my ($txt, $chunks) = (shift, shift); |
| my ($b, $e, $in, @out); |
| my @in = @$chunks; |
| while (($b, $e) = splice @in, 0, 2) { |
| $in = substr($txt, $b, $e - $b); |
| # remove any remaining directives |
| $in =~ s/^ ( \s* \# .* ( \\ $ \n .* )* ) / ' ' x length($1)/xgem; |
| push @out, $in; |
| } |
| \@out; |
| } |
| |
| sub typedef_hash { |
| my ($typedefs, $whited) = (shift,shift); |
| my %out; |
| |
| loop: |
| for my $o (0..$#$typedefs) { |
| my $wh = $whited->[$o]; |
| my $td = $typedefs->[$o]; |
| #my $verb = $td =~ /apr_child_errfn_t/ ? 1 : 0; |
| #warn "$wh || $td\n" if $verb; |
| if ($wh =~ /,/ or not $wh =~ /\w/) { # Hard case, guessimates ... |
| # Determine whether the new thingies are inside parens |
| $wh =~ /,/g; |
| my $p = pos $wh; |
| my ($s, $e); |
| if (matchingbrace($wh)) { # Inside. Easy part: just split on /,/... |
| $e = pos($wh) - 1; |
| $s = $e; |
| my $d = 0; |
| # Skip back |
| while (--$s >= 0) { |
| my $c = substr $wh, $s, 1; |
| if ($c =~ /[\(\{\[]/) { |
| $d--; |
| } elsif ($c =~ /[\)\]\}]/) { |
| $d++; |
| } |
| last if $d < 0; |
| } |
| if ($s < 0) { # Should not happen |
| warn("panic: could not match braces in\n\t$td\nwhited as\n\t$wh\n"); |
| next loop; |
| } |
| $s++; |
| } else { # We are at toplevel |
| # We need to skip back all the modifiers attached to the first thingy |
| # Guesstimates: everything after the first '*' (inclusive) |
| pos $wh = 0; |
| $wh = /(?=\w)/g; |
| my $ws = pos $wh; |
| my $pre = substr $wh, 0, $ws; |
| $s = $ws; |
| $s = pos $pre if $pre =~ /(?=\*)/g; |
| $e = length $wh; |
| } |
| # Now: need to split $td based on commas in $wh! |
| # And need to split each chunk of $td based on word in the chunk of $wh! |
| my $td_decls = substr($td, $s, $e - $s); |
| my ($pre, $post) = (substr($td, 0, $s), substr($td, $e)); |
| my $wh_decls = substr($wh, $s, $e - $s); |
| my @wh_decls = split /,/, $wh_decls; |
| my $td_s = 0; |
| my (@td_decl, @td_pre, @td_post, @td_word); |
| for my $wh_d (@wh_decls) { |
| my $td_d = substr $td, $td_s, length $wh_d; |
| push @td_decl, $td_d; |
| $wh_d =~ /(\w+)/g; |
| push @td_word, $1; |
| push @td_post, substr $td_d, pos($wh_d); |
| push @td_pre, substr $td_d, pos($wh_d) - length $1, length $1; |
| $td_s += 1 + length $wh_d; # Skip over ',' |
| } |
| for my $i (0..$#wh_decls) { |
| my $p = "$td_post[$i]$post"; |
| $p = '' unless $p =~ /\S/; |
| $out{$td_word[$i]} = ["$pre$td_pre[$i]", $p]; |
| } |
| } elsif ($td =~ /\(\s* \*? \s* ([^)]+) \s* \) \s* \(.*\)/gxs){ # XXX: function pointer typedef |
| $out{$1} = ['XXX: pre_foo', 'XXX: post_bar']; # XXX: not sure what to stuff here |
| #warn "[$1] [$td]" if $verb; |
| } else { # Only one thing defined... |
| $wh =~ /(\w+)/g; |
| my $e = pos $wh; |
| my $s = $e - length $1; |
| my $type = $1; |
| my $pre = substr $td, 0, $s; |
| my $post = substr $td, $e, length($td) - $e; |
| $post = '' unless $post =~ /\S/; |
| $out{$type} = [$pre, $post]; |
| } |
| |
| #die if $verb; |
| |
| } |
| \%out; |
| } |
| |
| sub typedef_structs { |
| my ($typehash, $structs) = @_; |
| my %structs; |
| for (0 .. $#$structs) { |
| my $in = $structs->[$_]; |
| my $key; |
| next unless $in =~ /^struct\s*(\w+)/; |
| next unless $in =~ s{^(struct\s*)(\w+)}{ |
| $key = "struct $2"; |
| $1 . " " x length($2) |
| }e; |
| my $name = parse_struct($in, \%structs); |
| $structs{$key} = defined($name) ? $structs{$name} : undef; |
| } |
| while (my ($key, $text) = each %$typehash) { |
| my $name = parse_struct($text->[0], \%structs); |
| $structs{$key} = defined($name) ? $structs{$name} : undef; |
| } |
| \%structs; |
| } |
| |
| sub parse_struct { |
| my ($in, $structs) = @_; |
| my ($b, $e, $chunk, $vars, $struct, $structname); |
| return "$1 $2" if $in =~ / |
| ^ \s* (struct | union) \s+ (\w+) \s* $ |
| /x; |
| ($structname, $in) = $in =~ / |
| ^ \s* ( (?: struct | union ) (?: \s+ \w+ )? ) \s* { \s* (.*?) \s* } \s* $ |
| /gisx or return; |
| $structname .= " _ANON" unless $structname =~ /\s/; |
| $structname .= " 0" if exists $structs->{$structname}; |
| $structname =~ s/(\d+$)/$1 + 1/e while exists $structs->{$structname}; |
| $structname =~ s/\s+/ /g; |
| $b = 0; |
| while ($in =~ /(\{|;|$)/g) { |
| matchingbrace($in), next if $1 eq '{'; |
| $e = pos($in); |
| next if $b == $e; |
| $chunk = substr($in, $b, $e - $b); |
| $b = $e; |
| if ($chunk =~ /\G\s*(struct|union|enum).*\}/gs) { |
| my $term = pos $chunk; |
| my $name = parse_struct(substr($chunk, 0, $term), $structs); |
| $vars = parse_vars(join ' ', $name, substr $chunk, $term); |
| } else { |
| $vars = parse_vars($chunk); |
| } |
| push @$struct, @{$vars||[]}; |
| } |
| $structs->{$structname} = $struct; |
| $structname; |
| } |
| |
| sub parse_vars { |
| my $in = shift; |
| my ($vars, $type, $word, $id, $post, $func); |
| |
| while ($in =~ /\G\s*([\[;,(]|\*+|:\s*\d+|\S+?\b|$)\s*/gc) { |
| $word = $1; |
| if ($word eq ';' || $word eq '') { |
| next unless defined $id; |
| $type = 'int' unless defined $type; # or is this an error? |
| push @$vars, [ $type, $post, $id ]; |
| ($type, $post, $id, $func) = (undef, undef, undef); |
| } elsif ($word eq ',') { |
| warn "panic: expecting name before comma in '$in'\n" unless defined $id; |
| $type = 'int' unless defined $type; # or is this an error? |
| push @$vars, [ $type, $post, $id ]; |
| $type =~ s/[ *]*$//; |
| $id = undef; |
| } elsif ($word eq '[') { |
| warn "panic: expecting name before '[' in '$in'\n" unless defined $id; |
| $type = 'int' unless defined $type; # or is this an error? |
| my $b = pos $in; |
| matchingbrace($in); |
| $post .= $word . substr $in, $b, pos($in) - $b; |
| } elsif ($word eq '(') { |
| # simple hack for function pointers |
| $type = join ' ', grep defined, $type, $id if defined $id; |
| $type = 'int' unless defined $type; |
| if ($in =~ /\G\s*(\*[\s\*]*?)\s*(\w+)[\[\]\d\s]*(\)\s*\()/gc) { |
| $type .= "($1"; |
| $id = $2; |
| $post = $3; |
| my $b = pos $in; |
| matchingbrace($in); |
| $post .= substr $in, $b, pos($in) - $b; |
| } else { |
| warn "panic: can't parse function pointer declaration in '$in'\n"; |
| return; |
| } |
| } elsif ($word =~ /^:/) { |
| # bitfield |
| $type = 'int' unless defined $type; |
| $post .= $word; |
| } else { |
| if (defined $post) { |
| if ($func) { |
| $post .= $word; |
| } else { |
| warn "panic: not expecting '$word' after array bounds in '$in'\n"; |
| } |
| } else { |
| $type = join ' ', grep defined, $type, $id if defined $id; |
| $id = $word; |
| } |
| } |
| } |
| unless ($vars) { |
| warn sprintf "failed on <%s> with type=<%s>, id=<%s>, post=<%s> at pos=%d\n", |
| $in, $type, $id, $post, pos($in); |
| } |
| $vars; |
| } |
| |
| sub vdecl_hash { |
| my ($vdecls, $mdecls) = @_; |
| my %vdecl_hash; |
| for (@$vdecls, @$mdecls) { |
| next if /[()]/; # ignore functions, and function pointers |
| my $copy = $_; |
| next unless $copy =~ s/^\s*extern\s*//; |
| my $vars = parse_vars($copy); |
| $vdecl_hash{$_->[2]} = [ @$_[0, 1] ] for @$vars; |
| } |
| \%vdecl_hash; |
| } |
| |
| # The output is the list of list of inline chunks and list of |
| # declaration chunks. |
| |
| sub functions_in { # The arg is text without type declarations. |
| my $in = shift; # remove_type_decl(top_level(sanitize($txt))); |
| # What remains now consists of variable and function declarations, |
| # and inline functions. |
| $in =~ /(?=\S)/g; |
| my ($b, $e, $b1, $e1, @inlines, @decls, @mdecls, @fdecls, @vdecls); |
| $b = pos $in; |
| my $chunk; |
| while (defined($b) && $b != length $in) { |
| $in =~ /;/g or pos $in = $b, $in =~ /.*\S|\Z/g ; # Or last non-space |
| $e = pos $in; |
| $chunk = substr $in, $b, $e - $b; |
| # Now subdivide the chunk. |
| # |
| # What we got is one chunk, probably finished by `;'. Whoever, it |
| # may start with several inline functions. |
| # |
| # Note that inline functions contain ( ) { } in the stripped version. |
| $b1 = 0; |
| while ($chunk =~ /\(\s*\)\s*\{\s*\}/g) { |
| $e1 = pos $chunk; |
| push @inlines, $b + $b1, $b + $e1; |
| $chunk =~ /(?=\S)/g; |
| $b1 = pos $chunk; |
| $b1 = length $chunk, last unless defined $b1; |
| } |
| if ($e - $b - $b1 > 0) { |
| my ($isvar, $isfunc) = (1, 1); |
| substr ($chunk, 0, $b1) = ''; |
| if ($chunk =~ /,/) { # Contains multiple declarations. |
| push @mdecls, $b + $b1, $e; |
| } else { # Non-multiple. |
| # Since leading \s* is not optimized, this is quadratic! |
| $chunk =~ s{ |
| ( ( const | __const |
| | __attribute__ \s* \( \s* \) |
| ) \s* )* ( ; \s* )? \Z # Strip from the end |
| }()x; |
| $chunk =~ s/\s*\Z//; |
| if ($chunk =~ /\)\Z/) { # Function declaration ends on ")"! |
| if ($chunk !~ m{ |
| \( .* \( # Multiple parenths |
| }x |
| and $chunk =~ / \w \s* \( /x) { # Most probably pointer to a function? |
| $isvar = 0; |
| } |
| } elsif ($chunk =~ / |
| ^ \s* (enum|struct|union|class) \s+ \w+ \s* $ |
| /x) { |
| $isvar = $isfunc = 0; |
| } |
| if ($isvar) { # Heuristically variable |
| push @vdecls, $b + $b1, $e; |
| } elsif ($isfunc) { |
| push @fdecls, $b + $b1, $e; |
| } |
| } |
| push @decls, $b + $b1, $e if $isvar || $isfunc; |
| } |
| $in =~ /\G\s*/g ; |
| $b = pos $in; |
| } |
| [\@inlines, \@decls, \@mdecls, \@vdecls, \@fdecls]; |
| } |
| |
| # XXXX This is heuristical in many respects... |
| # Recipe: remove all struct-ish chunks. Remove all array specifiers. |
| # Remove GCC attribute specifiers. |
| # What remains may contain function's arguments, old types, and newly |
| # defined types. |
| # Remove function arguments using heuristics methods. |
| # Now out of several words in a row the last one is a newly defined type. |
| |
| sub whited_decl { # Input is sanitized. |
| my $keywords_rex = shift; |
| my $in = shift; # Text of a declaration |
| |
| #typedef ret_type*(*func) -> typedef ret_type* (*func) |
| $in =~ s/\*\(\*/* \(*/; |
| |
| my $rest = $in; |
| my $out = $in; # Whited out $in |
| |
| # Remove all the structs |
| while ($out =~ /(\b(struct|union|class|enum)(\s+\w+)?\s*\{)/g) { |
| my $pos_start = pos($out) - length $1; |
| |
| matchingbrace($out); |
| my $pos_end = pos $out; |
| substr($out, $pos_start, $pos_end - $pos_start) = |
| ' ' x ($pos_end - $pos_start); |
| pos $out = $pos_end; |
| } |
| |
| # Deal with glibc's wierd ass __attribute__ tag. Just dump it. |
| # Maaaybe this should check to see if you're using GCC, but I don't |
| # think so since glibc is nice enough to do that for you. [MGS] |
| while ( $out =~ m/(\b(__attribute__|attribute)\s*\((?=\s*\())/g ) { |
| my $att_pos_start = pos($out) - length($1); |
| |
| # Need to figure out where ((..)) ends. |
| matchingbrace($out); |
| my $att_pos_end = pos $out; |
| |
| # Remove the __attribute__ tag. |
| substr($out, $att_pos_start, $att_pos_end - $att_pos_start) = |
| ' ' x ($att_pos_end - $att_pos_start); |
| pos $out = $att_pos_end; |
| } |
| |
| # Remove arguments of functions (heuristics only). |
| # These things (start) arglist of a declared function: |
| # paren word comma |
| # paren word space non-paren |
| # paren keyword paren |
| # start a list of arguments. (May be "cdecl *myfunc"?) XXXXX ????? |
| while ( $out =~ /(\(\s*(\w+(,|\s*[^\)\s])|$keywords_rex\s*\)))/g ) { |
| my $pos_start = pos($out) - length($1); |
| pos $out = $pos_start + 1; |
| matchingbrace($out); |
| substr ($out, $pos_start + 1, pos($out) - 2 - $pos_start) |
| = ' ' x (pos($out) - 2 - $pos_start); |
| } |
| # Remove array specifiers |
| $out =~ s/(\[[\w\s\+]*\])/ ' ' x length $1 /ge; |
| my $tout = $out; |
| # Several words in a row cannot be new typedefs, but the last one. |
| $out =~ s/((\w+\**\s+)+(?=[^\s,;\[\{\)]))/ ' ' x length $1 /ge; |
| unless ($out =~ /\w/) { |
| # Probably a function-type declaration: typedef int f(int); |
| # Redo scan leaving the last word of the first group of words: |
| if ($tout =~ /(\w+\s+)*(\w+)\s*\(/g) { |
| $out = ' ' x (pos($tout) - length $2) |
| . $2 . ' ' x (length($tout) - pos($tout)); |
| } |
| else { |
| # try a different approach to get the last type |
| my $len = length $tout; |
| # cut all non-words at the end of the definition |
| my $end = $tout =~ s/(\W*)$// ? length $1 : 0; |
| # remove everything but the last word |
| my $mid = $tout =~ s/.*?(\w*)$/$1/ ? length $1 : 0; |
| # restore the length |
| $out = $tout . ' ' x ($len - $mid); |
| } |
| # warn "function typedef\n\t'$in'\nwhited-out as\n\t'$out'\n"; |
| } |
| warn "panic: length mismatch\n\t'$in'\nwhited-out as\n\t'$out'\n" |
| if length($in) != length $out; |
| # Sanity check |
| warn "panic: multiple types without intervening comma in\n\t'$in'\nwhited-out as\n\t'$out'\n" |
| if $out =~ /\w[^\w,]+\w/; |
| warn "panic: no types found in\n\t'$in'\nwhited-out as\n\t'$out'\n" |
| unless $out =~ /\w/; |
| $out |
| } |
| |
| sub matchingbrace { |
| # pos($_[0]) is after the opening brace now |
| my $n = 0; |
| while ($_[0] =~ /([\{\[\(])|([\]\)\}])/g) { |
| $1 ? $n++ : $n-- ; |
| return 1 if $n < 0; |
| } |
| # pos($_[0]) is after the closing brace now |
| return; # false |
| } |
| |
| sub remove_Comments_no_Strings { # We expect that no strings are around |
| my $in = shift; |
| $in =~ s,/(/.*|\*[\s\S]*?\*/),,g ; # C and C++ |
| die "Unfinished comment" if $in =~ m,/\*, ; |
| $in; |
| } |
| |
| sub sanitize { # We expect that no strings are around |
| my $in = shift; |
| # C and C++, strings and characters |
| $in =~ s{ / ( |
| / .* # C++ style |
| | |
| \* [\s\S]*? \*/ # C style |
| ) # (1) |
| | '((?:[^\\\']|\\.)+)' # (2) Character constants |
| | "((?:[^\\\"]|\\.)*)" # (3) Strings |
| | ( ^ \s* \# .* # (4) Preprocessor |
| ( \\ $ \n .* )* ) # and continuation lines |
| } { |
| # We want to preserve the length, so that one may go back |
| defined $1 ? ' ' x (1 + length $1) : |
| defined $4 ? ' ' x length $4 : |
| defined $2 ? "'" . ' ' x length($2) . "'" : |
| defined $3 ? '"' . ' ' x length($3) . '"' : '???' |
| }xgem ; |
| die "Unfinished comment" if $in =~ m{ /\* }x; |
| $in; |
| } |
| |
| sub top_level { # We expect argument is sanitized |
| # Note that this may remove the variable in declaration: int (*func)(); |
| my $in = shift; |
| my $start; |
| my $out = $in; |
| while ($in =~ /[\[\{\(]/g ) { |
| $start = pos $in; |
| matchingbrace($in); |
| substr($out, $start, pos($in) - 1 - $start) |
| = ' ' x (pos($in) - 1 - $start); |
| } |
| $out; |
| } |
| |
| sub remove_type_decl { # We suppose that the arg is top-level only. |
| my $in = shift; |
| $in =~ s/(\b__extension__)(\s+typedef\b)/(' ' x length $1) . $2/gse; |
| $in =~ s/(\btypedef\b.*?;)/' ' x length $1/gse; |
| # The following form may appear only in the declaration of the type itself: |
| $in =~ |
| s/(\b(enum|struct|union|class)\b[\s\w]*\{\s*\}\s*;)/' ' x length $1/gse; |
| $in; |
| } |
| |
| sub new { |
| my $class = shift; |
| my $out = SUPER::new $class $recipes; |
| $out->set(@_); |
| $out; |
| } |
| |
| sub do_declarations { |
| my @d = map do_declaration($_, $_[1], $_[2]), @{ $_[0] }; |
| \@d; |
| } |
| |
| # Forth argument: if defined, there maybe no identifier. Generate one |
| # basing on this argument. |
| |
| sub do_declaration { |
| my ($decl, $typedefs, $keywords, $argnum) = @_; |
| $decl =~ s/;?\s*$//; |
| |
| my ($type, $typepre, $typepost, $ident, $args, $w, $pos, $repeater); |
| $decl =~ s/[\r\n]\s*/ /g; |
| #warn "DECLAR [$decl][$argnum]\n"; |
| $decl =~ s/^\s*__extension__\b\s*//; |
| $decl =~ s/^\s*extern\b\s*//; |
| $decl =~ s/^\s*__inline\b\s*//; |
| $pos = 0; |
| while ($decl =~ /(\w+)/g and ($typedefs->{$1} or $keywords->{$1})) { |
| $w = $1; |
| if ($w =~ /^(struct|class|enum|union)$/) { |
| $decl =~ /\G\s+\w+/g or die "`$w' is not followed by word in `$decl'"; |
| } |
| $pos = pos $decl; |
| } |
| #warn "pos: $pos\n"; |
| pos $decl = $pos; |
| $decl =~ /\G[\s*]*\*/g or pos $decl = $pos; |
| $type = substr $decl, 0, pos $decl; |
| $decl =~ /\G\s*/g or pos $decl = length $type; # ???? |
| $pos = pos $decl; |
| #warn "pos: $pos\n"; |
| if (defined $argnum) { |
| if ($decl =~ /\G(\w+)((\s*\[[^][]*\])*)/g) { # The best we can do with [2] |
| $ident = $1; |
| $repeater = $2; |
| $pos = pos $decl; |
| } else { |
| pos $decl = $pos = length $decl; |
| $type = $decl; |
| $ident = "arg$argnum"; |
| } |
| } else { |
| die "Cannot process declaration `$decl' without an identifier" |
| unless $decl =~ /\G(\w+)/g; |
| $ident = $1; |
| $pos = pos $decl; |
| } |
| #warn "pos: $pos\n"; |
| $decl =~ /\G\s*/g or pos $decl = $pos; |
| $pos = pos $decl; |
| #my $st = length $decl; |
| #warn substr($decl, 0, $pos), "\n"; |
| #warn "pos: $pos $st\n"; |
| #warn "DECLAR [$decl][$argnum]\n"; |
| if (pos $decl != length $decl) { |
| pos $decl = $pos; |
| die "Expecting parenth after identifier in `$decl'\nafter `", |
| substr($decl, 0, $pos), "'" |
| unless $decl =~ /\G\(/g; |
| my $argstring = substr($decl, pos($decl) - length $decl); |
| matchingbrace($argstring) or die "Cannot find matching parenth in `$decl'"; |
| $argstring = substr($argstring, 0, pos($argstring) - 1); |
| $argstring =~ s/ ^ ( \s* void )? \s* $ //x; |
| $args = []; |
| my @args; |
| if ($argstring ne '') { |
| my $top = top_level $argstring; |
| my $p = 0; |
| my $arg; |
| while ($top =~ /,/g) { |
| $arg = substr($argstring, $p, pos($top) - 1 - $p); |
| $arg =~ s/^\s+|\s+$//gs; |
| push @args, $arg; |
| $p = pos $top; |
| } |
| $arg = substr $argstring, $p; |
| $arg =~ s/^\s+|\s+$//gs; |
| push @args, $arg; |
| } |
| my $i = 0; |
| for (@args) { |
| push @$args, do_declaration1($_, $typedefs, $keywords, $i++); |
| } |
| } |
| [$type, $ident, $args, $decl, $repeater]; |
| } |
| |
| sub do_declaration1 { |
| my ($decl, $typedefs, $keywords, $argnum) = @_; |
| $decl =~ s/;?\s*$//; |
| #warn "DECLARO [$decl][$argnum]\n"; |
| my ($type, $typepre, $typepost, $ident, $args, $w, $pos, $repeater); |
| $pos = 0; |
| while ($decl =~ /(\w+)/g and ($typedefs->{$1} or $keywords->{$1})) { |
| $w = $1; |
| if ($w =~ /^(struct|class|enum|union)$/) { |
| $decl =~ /\G\s+\w+/g or die "`$w' is not followed by word in `$decl'"; |
| } |
| $pos = pos $decl; |
| } |
| #warn "POS: $pos\n"; |
| pos $decl = $pos; |
| $decl =~ /\G[\s*]*\*/g or pos $decl = $pos; |
| $type = substr $decl, 0, pos $decl; |
| $decl =~ /\G\s*/g or pos $decl = length $type; # ???? |
| $pos = pos $decl; |
| if (defined $argnum) { |
| if ($decl =~ /\G(\w+)((\s*\[[^][]*\])*)/g) { # The best we can do with [2] |
| $ident = $1; |
| $repeater = $2; |
| $pos = pos $decl; |
| } else { |
| pos $decl = $pos = length $decl; |
| $type = $decl; |
| $ident = "arg$argnum"; |
| } |
| } else { |
| die "Cannot process declaration `$decl' without an identifier" |
| unless $decl =~ /\G(\w+)/g; |
| $ident = $1; |
| $pos = pos $decl; |
| } |
| $decl =~ /\G\s*/g or pos $decl = $pos; |
| $pos = pos $decl; |
| #warn "DECLAR1 [$decl][$argnum]\n"; |
| #my $st = length $decl; |
| #warn substr($decl, 0, $pos), "\n"; |
| #warn "pos: $pos $st\n"; |
| if (pos $decl != length $decl) { |
| pos $decl = $pos; |
| die "Expecting parenth after identifier in `$decl'\nafter `", |
| substr($decl, 0, $pos), "'" |
| unless $decl =~ /\G\(/g; |
| my $argstring = substr($decl, pos($decl) - length $decl); |
| matchingbrace($argstring) or die "Cannot find matching parenth in `$decl'"; |
| $argstring = substr($argstring, 0, pos($argstring) - 1); |
| $argstring =~ s/ ^ ( \s* void )? \s* $ //x; |
| $args = []; |
| my @args; |
| if ($argstring ne '') { |
| my $top = top_level $argstring; |
| my $p = 0; |
| my $arg; |
| while ($top =~ /,/g) { |
| $arg = substr($argstring, $p, pos($top) - 1 - $p); |
| $arg =~ s/^\s+|\s+$//gs; |
| push @args, $arg; |
| $p = pos $top; |
| } |
| $arg = substr $argstring, $p; |
| $arg =~ s/^\s+|\s+$//gs; |
| push @args, $arg; |
| } |
| my $i = 0; |
| for (@args) { |
| push @$args, do_declaration1($_, $typedefs, $keywords, $i++); |
| } |
| } |
| [$type, $ident, $args, $decl, $repeater]; |
| } |
| |
| ############################################################ |
| |
| package C::Preprocessed; |
| use Symbol; |
| use File::Basename; |
| use Config; |
| use constant WIN32 => $^O eq 'MSWin32'; |
| |
| sub new { |
| die "usage: C::Preprocessed->new(filename[, defines[, includes[, cpp]]])" |
| if @_ < 2 or @_ > 5; |
| my ($class, $filename, $Defines, $Includes, $Cpp) |
| = (shift, shift, shift, shift, shift); |
| $Cpp ||= \%Config::Config; |
| my $filedir = dirname $filename || '.'; |
| $Includes ||= [$filedir, '/usr/local/include', '.']; |
| my $addincludes = ""; |
| $addincludes = "-I" . join(" -I", @$Includes) |
| if defined $Includes and @$Includes; |
| my ($sym) = gensym; |
| my $cmd = WIN32 ? |
| "$Cpp->{cppstdin} $Defines $addincludes $Cpp->{cppflags} $filename |" : |
| "echo '\#include \"$filename\"' | $Cpp->{cppstdin} $Defines $addincludes $Cpp->{cppflags} $Cpp->{cppminus} | grep -v '^#' |"; |
| #my $cmd = "echo '\#include <$filename>' | $Cpp->{cppstdin} $Defines $addincludes $Cpp->{cppflags} $Cpp->{cppminus} |"; |
| |
| (open($sym, $cmd) or die "Cannot open pipe from `$cmd': $!") |
| and bless $sym => $class; |
| } |
| |
| sub text { |
| my $class = shift; |
| my $filter = shift; |
| if (defined $filter) { |
| return text_only_from($class, $filter, @_); |
| } |
| my $stream = $class->new(@_); |
| my $oh = select $stream; |
| local $/; |
| select $oh; |
| <$stream>; |
| } |
| |
| sub text_only_from { |
| my $class = shift; |
| my $from = shift || die "Expecting argument in `text_only_from'"; |
| my $stream = $class->new(@_); |
| my $on = $from eq $_[0]; |
| my $eqregexp = $on ? '\"\"|' : ''; |
| my @out; |
| while (<$stream>) { |
| #print; |
| |
| $on = /$eqregexp[\"\/]\Q$from\"/ if /^\#/; |
| push @out, $_ if $on; |
| } |
| join '', @out; |
| } |
| |
| sub DESTROY { |
| close($_[0]) |
| or die "Cannot close pipe from `$Config::Config{cppstdin}': err $?, $!\n"; |
| } |
| |
| # Autoload methods go after __END__, and are processed by the autosplit program. |
| # Return to the principal package. |
| package ModPerl::CScan; |
| |
| 1; |
| __END__ |
| |
| =head1 NAME |
| |
| ModPerl::CScan - scan C language files for easily recognized constructs. |
| |
| =head1 SYNOPSIS |
| |
| =head1 DESCRIPTION |
| |
| See the C<C::Scan> manpage. This package is just a fork to fix certain |
| things that didn't work in the original C<C::Scan>, which is not |
| maintained any longer. These fixes required to make it work with the |
| Apache 2 source code. |
| |
| =cut |