blob: 61255d0ad7b4173bc3ad9057cce458b5bdbb8544 [file] [log] [blame]
package Lexer;
use Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(lex);
# Store end token.
my @end_tok = [ "end" ];
# lex(string,multiline)
# Takes as input a string of line and multiline variable deciding whether
# current line is related to the previous line. Divides it into tokens as
# specified by Regex and outputs an array of Tokens. Every Token is an
# array having two values: token name and its value. If its an operator,
# it has only one value.
sub lex($)
{
my ( $multiline ) = @_;
my @tokens;
my $rhs = 0;
$_ = scalar <>;
# Send an end token when EOF is reached.
return ( \@end_tok, $multiline ) unless $_;
while( $_ )
{
if( $multiline )
{
if( $multiline eq 'comment' )
{
die 'comment following trailing backslash' if m/^#/o;
die 'blank line following trailing backslash' if m/^\s*$/;
chomp;
$multiline = undef unless s/\\//o;
push @tokens, [ "comment" , $_ ];
push @tokens, [ "newline" ] unless $multiline;
$_ = undef;
}
else
{
if( m/^##/ )
{
$_ = undef;
last;
}
elsif( m/^#/ )
{
die 'comment following trailing backslash';
}
else
{
$multiline = undef;
$rhs = 1;
}
}
}
elsif( $rhs )
{
my @vals = split;
my $comment;
foreach my $val ( @vals )
{
if( $val =~ m/^#(.*)/ )
{
$multiline = 'comment' if $vals[ -1 ] eq '\\';
$comment .= " ".$1;
}
elsif( $val =~ m/\\/ )
{
$multiline = 'rhsval' unless$multiline;
}
elsif( $comment )
{
$comment .= " ".$val;
}
else
{
push @tokens, [ "rhsval" , $val];
}
}
push @tokens, [ "comment" , $comment] if $comment;
push @tokens, [ "newline" ] unless $multiline;
$_ = undef;
}
elsif( $include )
{
push @tokens, [ "value" , $1] if m/^\s+(.+)/;
push @tokens, [ "newline" ];
$include = 0;
last;
}
elsif( s/^##.*\n$//o )
{
}
elsif( s/^#(.*)\n$//o )
{
my $val = $1;
if( $val =~ m/(.*?)\\/o )
{
push @tokens, [ "comment" , substr( $1 , 0 , -1 )];
$multiline = 'comment';
}
else
{
push @tokens, [ "comment" , $1];
push @tokens, [ "newline" ];
}
}
elsif( s/^(PROGRAMS|LIBRARIES|LTLIBRARIES|LISP|PYTHON|JAVA|SCRIPTS|DATA|HEADERS|MASN|TEXINFOS|if|else|endif|include)//o)
{
push @tokens, [$1];
$include = 1 if ( $1 eq 'include' );
}
elsif( s/^([a-zA-Z0-9_]+)//o )
{
push @tokens, ["value",$1];
}
elsif( s/^(\+=)//o )
{
push @tokens,['+'];
push @tokens,['='];
$rhs = 1;
}
elsif( s/^(=)//o )
{
push @tokens, [$1];
$rhs = 1;
}
elsif( s/^(:|_|!)//o )
{
push @tokens, [$1];
}
elsif( s/^\n//o )
{
push @tokens, ["newline"] if $#tokens > -1;
$multiline = undef;
}
elsif( s/^(\r|\s+)//o )
{
}
else
{
die "Incorrect input $_";
}
}
# Returns undef when no tokens.
return ( undef, $multiline ) if !@tokens;
return ( \@tokens , $multiline );
}