| package Lexer; |
| |
| use Exporter; |
| |
| our @ISA = qw(Exporter); |
| our @EXPORT = qw(lex); |
| |
| # Store end token. |
| my @end_tok = [ "end" ]; |
| |
| # lex(string,multiline) |
| # Takes as input a string of line and multiline variable deciding whether |
| # current line is related to the previous line. Divides it into tokens as |
| # specified by Regex and outputs an array of Tokens. Every Token is an |
| # array having two values: token name and its value. If its an operator, |
| # it has only one value. |
| sub lex($) |
| { |
| my ( $multiline ) = @_; |
| my @tokens; |
| my $rhs = 0; |
| $_ = scalar <>; |
| |
| # Send an end token when EOF is reached. |
| return ( \@end_tok, $multiline ) unless $_; |
| |
| while( $_ ) |
| { |
| if( $multiline ) |
| { |
| if( $multiline eq 'comment' ) |
| { |
| die 'comment following trailing backslash' if m/^#/o; |
| die 'blank line following trailing backslash' if m/^\s*$/; |
| chomp; |
| $multiline = undef unless s/\\//o; |
| push @tokens, [ "comment" , $_ ]; |
| push @tokens, [ "newline" ] unless $multiline; |
| $_ = undef; |
| } |
| else |
| { |
| if( m/^##/ ) |
| { |
| $_ = undef; |
| last; |
| } |
| elsif( m/^#/ ) |
| { |
| die 'comment following trailing backslash'; |
| } |
| else |
| { |
| $multiline = undef; |
| $rhs = 1; |
| } |
| } |
| } |
| elsif( $rhs ) |
| { |
| my @vals = split; |
| my $comment; |
| foreach my $val ( @vals ) |
| { |
| if( $val =~ m/^#(.*)/ ) |
| { |
| $multiline = 'comment' if $vals[ -1 ] eq '\\'; |
| $comment .= " ".$1; |
| } |
| elsif( $val =~ m/\\/ ) |
| { |
| $multiline = 'rhsval' unless$multiline; |
| } |
| elsif( $comment ) |
| { |
| $comment .= " ".$val; |
| } |
| else |
| { |
| push @tokens, [ "rhsval" , $val]; |
| } |
| } |
| push @tokens, [ "comment" , $comment] if $comment; |
| push @tokens, [ "newline" ] unless $multiline; |
| $_ = undef; |
| } |
| elsif( $include ) |
| { |
| push @tokens, [ "value" , $1] if m/^\s+(.+)/; |
| push @tokens, [ "newline" ]; |
| $include = 0; |
| last; |
| } |
| elsif( s/^##.*\n$//o ) |
| { |
| } |
| elsif( s/^#(.*)\n$//o ) |
| { |
| my $val = $1; |
| if( $val =~ m/(.*?)\\/o ) |
| { |
| push @tokens, [ "comment" , substr( $1 , 0 , -1 )]; |
| $multiline = 'comment'; |
| } |
| else |
| { |
| push @tokens, [ "comment" , $1]; |
| push @tokens, [ "newline" ]; |
| } |
| } |
| elsif( s/^(PROGRAMS|LIBRARIES|LTLIBRARIES|LISP|PYTHON|JAVA|SCRIPTS|DATA|HEADERS|MASN|TEXINFOS|if|else|endif|include)//o) |
| { |
| push @tokens, [$1]; |
| $include = 1 if ( $1 eq 'include' ); |
| } |
| elsif( s/^([a-zA-Z0-9_]+)//o ) |
| { |
| push @tokens, ["value",$1]; |
| } |
| elsif( s/^(\+=)//o ) |
| { |
| push @tokens,['+']; |
| push @tokens,['=']; |
| $rhs = 1; |
| } |
| elsif( s/^(=)//o ) |
| { |
| push @tokens, [$1]; |
| $rhs = 1; |
| } |
| elsif( s/^(:|_|!)//o ) |
| { |
| push @tokens, [$1]; |
| } |
| elsif( s/^\n//o ) |
| { |
| push @tokens, ["newline"] if $#tokens > -1; |
| $multiline = undef; |
| } |
| elsif( s/^(\r|\s+)//o ) |
| { |
| } |
| else |
| { |
| die "Incorrect input $_"; |
| } |
| } |
| |
| # Returns undef when no tokens. |
| return ( undef, $multiline ) if !@tokens; |
| |
| return ( \@tokens , $multiline ); |
| } |
| |