Inline::C::ParseRegExp - The New and Improved Inline::C Parser


Inline documentation Contained in the Inline distribution.

Index


Code Index:

NAME

Top

Inline::C::ParseRegExp - The New and Improved Inline::C Parser

SYNOPSIS

Top

    use Inline C => DATA =>
               USING => ParseRegExp;

DESCRIPTION

Top

This module is a much faster version of Inline::C's Parse::RecDescent parser. It is based on regular expressions instead.

AUTHOR

Mitchell N Charity <mcharity@vendian.org>

COPYRIGHT

Top


Inline documentation Contained in the Inline distribution.

package Inline::C::ParseRegExp;
use strict;
use Carp;

sub register {
    {
     extends => [qw(C)],
     overrides => [qw(get_parser)],
    }
}

sub get_parser {
    Inline::C::_parser_test("Inline::C::ParseRegExp::get_parser called\n") if $_[0]->{CONFIG}{_TESTING};
    bless {}, 'Inline::C::ParseRegExp'
}

sub code {
    my($self,$code) = @_;

    # These regular expressions were derived from Regexp::Common v0.01.
    my $RE_comment_C   = q{(?:(?:\/\*)(?:(?:(?!\*\/)[\s\S])*)(?:\*\/))};
    my $RE_comment_Cpp = q{(?:\/\*(?:(?!\*\/)[\s\S])*\*\/|\/\/[^\n]*\n)};
    my $RE_quoted      = (q{(?:(?:\")(?:[^\\\"]*(?:\\.[^\\\"]*)*)(?:\")}
                         .q{|(?:\')(?:[^\\\']*(?:\\.[^\\\']*)*)(?:\'))});
    our $RE_balanced_brackets; $RE_balanced_brackets =
        qr'(?:[{]((?:(?>[^{}]+)|(??{$RE_balanced_brackets}))*)[}])';
    our $RE_balanced_parens; $RE_balanced_parens   =
        qr'(?:[(]((?:(?>[^()]+)|(??{$RE_balanced_parens}))*)[)])';

    # First, we crush out anything potentially confusing.
    # The order of these _does_ matter.
    $code =~ s/$RE_comment_C/ /go;
    $code =~ s/$RE_comment_Cpp/ /go;
    $code =~ s/^\#.*(\\\n.*)*//mgo;
    #$code =~ s/$RE_quoted/\"\"/go; # Buggy, if included.
    $code =~ s/$RE_balanced_brackets/{ }/go;

    $self->{_the_code_most_recently_parsed} = $code; # Simplifies debugging.

    my $normalize_type = sub {
	# Normalize a type for lookup in a typemap.
        my($type) = @_;

        # Remove "extern".
        # But keep "static", "inline", "typedef", etc,
        #  to cause desirable typemap misses.
        $type =~ s/\bextern\b//g;

        # Whitespace: only single spaces, none leading or trailing.
        $type =~ s/\s+/ /g;
        $type =~ s/^\s//; $type =~ s/\s$//;

        # Adjacent "derivative characters" are not separated by whitespace,
        # but _are_ separated from the adjoining text.
        # [ Is really only * (and not ()[]) needed??? ]
        $type =~ s/\*\s\*/\*\*/g;
        $type =~ s/(?<=[^ \*])\*/ \*/g;

        return $type;
    };

    # The decision of what is an acceptable declaration was originally
    # derived from Inline::C::grammar.pm version 0.30 (Inline 0.43).

    my $re_plausible_place_to_begin_a_declaration = qr {
		# The beginning of a line, possibly indented.
		# (Accepting indentation allows for C code to be aligned with
		#  its surrounding perl, and for backwards compatibility with
		#  Inline 0.43).
		(?m: ^ ) \s*
        }xo;

    # Instead of using \s , we dont tolerate blank lines.
    # This matches user expectation better than allowing arbitrary
    # vertical whitespace.
    my $sp = qr{[ \t]|\n(?![ \t]*\n)};

    my $re_type = qr {(
						(?: \w+ $sp* )+? # words
						(?: \*  $sp* )*  # stars
						)}xo;

    my $re_identifier = qr{ (\w+) $sp* }xo;

    while($code =~ m{
		$re_plausible_place_to_begin_a_declaration
                ( $re_type $re_identifier $RE_balanced_parens $sp* (\;|\{) )
              }xgo)
    {
        my($type, $identifier, $args, $what) = ($2,$3,$4,$5);
        $args = "" if $args =~ /^\s+$/;

        my $is_decl     = $what eq ';';
        my $function    = $identifier;
        my $return_type = &$normalize_type($type);
	my @arguments   = split ',', $args;

        goto RESYNC if $is_decl && !$self->{data}{AUTOWRAP};
        goto RESYNC if $self->{data}{done}{$function};
        goto RESYNC if !defined
            $self->{data}{typeconv}{valid_rtypes}{$return_type};

        my(@arg_names,@arg_types);
	my $dummy_name = 'arg1';

	foreach my $arg (@arguments) {

	    if(my($type, $identifier) =
	       $arg =~ /^\s*$re_type(?:$re_identifier)?\s*$/o)
	    {
		my $arg_name = $identifier;
		my $arg_type = &$normalize_type($type);

		if(!defined $arg_name) {
		    goto RESYNC if !$is_decl;
		    $arg_name = $dummy_name++;
		}
		goto RESYNC if !defined
		    $self->{data}{typeconv}{valid_types}{$arg_type};

		push(@arg_names,$arg_name);
		push(@arg_types,$arg_type);
	    }
	    elsif($arg =~ /^\s*\.\.\.\s*$/) {
		push(@arg_names,'...');
		push(@arg_types,'...');
	    }
	    else {
		goto RESYNC;
	    }
	}

        # Commit.
        push @{$self->{data}{functions}}, $function;
        $self->{data}{function}{$function}{return_type}= $return_type;
        $self->{data}{function}{$function}{arg_names} = [@arg_names];
        $self->{data}{function}{$function}{arg_types} = [@arg_types];
        $self->{data}{done}{$function} = 1;

        next;

      RESYNC:  # Skip the rest of the current line, and continue.
        $code =~ /\G[^\n]*\n/gc;
    }

   return 1;  # We never fail.
}

1;

__DATA__