#!/usr/bin/perl

# Multi-purpose, GRASS 4.x Site_list program.  Based on the perl
# module Parse::RecDescent (a recursive descent parser generator).
#
# Basically, a language for describing a GRASS 4.x site_list file
# is used for working with GRASS 4.x site_list files.
#
# -v    Vet - just determine if file is a proper site_list
# -i    Description field has integer data (can be used with -v)
# -s    Description field is string (can be used with -v)
# -S    Strict interpretation
# -d    If re-writing data, discard anything which doesn't fit.
#       otherwise, if gets appended to the description line.
#       If the name listed on name line doesn't match file name,
#       the old_name gets put on desc line and name field is updated.
# -t    Run internal tests.
# -p    Print grammar and exit.
#
# The language doesn't handle blank lines (only end-of-line), so
# the file reading code skips those.  Everything else gets parsed.

#I can be reached at <ghaverla@freenet.edmonton.ab.ca>
#Gordon Haverland

my $Strict = 0;

use Getopt::Std;
getopts('dDhipsStv');

&usage() if( $opt_h );
&usage('Not both -i and -s') if( $opt_i && $opt_s );
$Strict = 1 if( $opt_S );



# ****************************************************
# We are going to build a grammar for parsing Grass 4.x
# site_list files from pieces, depending on intent.
# The parser is built by the perl Parse::RecDescent module
# based on the grammar we build up.
use Parse::RecDescent;


&init_grammar_elements();

my( @easting, @northing, @description, %hdr, @no_match );
my $dline         = 0;
my $i_dline       = 0;
my $s_dline       = 0;
my $G4_SL_Grammar = '';

# Okay, assemble our grammar
$G4_SL_Grammar = &build_grammar();

if( $opt_p ) {
    print $G4_SL_Grammar;
    exit 0;
}

if( $opt_D ) {
    $::RD_HINT++;
    $::RD_ERRORS++;
    $::RD_WARN++;
    $::RD_TRACE++;
    #$Parse::RecDescent::skip = ''; # Don't skip leading whitespace.
    #$::RD_++;
    #$::RD_++;
}

# Initialize a Recursive Descent parser for the above grammar
$parser = Parse::RecDescent->new( $G4_SL_Grammar );


&test() if( $opt_t );

# Are we reading a file be name, or just reading STDIN?
if( $#ARGV == -1 ) { 
    $input = '-';
} else {
    $input = $ARGV[0];
    &usage() unless( (-e $input) && (-r $input) );
#    ($atime, $mtime) = (stat($input))[8,9];
#    utime( $atime, $mtime, $output_file );
}

open( INPUT, "< $input" ) || die "Error: can't open $input for read\n";

if( $opt_v ) {
    &vet();
} else {
    &rewrite();
}



sub init_grammar_elements {

    # Strictly speaking, the two choices of labels must be lower case.
    # We can allow any case
    $G4_Site_List_Grammar{Header_Label_Strict} = q{
hdr_label      : /(name|desc)/
    };
    $G4_Site_List_Grammar{Header_Label} = q{
hdr_label      : /(name|desc)/i
    };


    # The two coordinates (easting and northing) can be integer or
    # float (will be promoted to %12.3f floats if written).  Description
    # fields in header or data just match anything.
    $G4_Site_List_Grammar{Components} = q{
integer        : /[-+]?\\d+/
float          : /[-+]?((\\d+\\.\\d*)|(\\d*\\.\\d+))/
description    : /.*/
blank          : /^\\s*$/
comment        : /^\\s*\\#.*$/

coord          : float
               | integer

easting        : coord
northing       : coord
    };
    $G4_Site_List_Grammar{Separator} = q{
separator      : '|'
    };

    # If you are looking for either Integer_Data point records or
    # String_Data point records, Integer must come first in parser
    # grammar.  First match is best.
    $G4_Site_List_Grammar{Integer_Data} = q{
point_record   : easting separator northing separator '#' integer description
               {
                   #PLACEHOLDER
		   $main::dline++;
		   $main::i_dline++;
		   1;
	       }
    };

    $G4_Site_List_Grammar{String_Data} = q{
point_record   : easting separator northing separator description
               {
                   #PLACEHOLDER
		   $main::dline++;
		   $main::s_dline++;
		   1;
	       }
    };

    # Comments are allowed (and ignored) by GRASS functions.
    $G4_Site_List_Grammar{Comment} = q{
comment_line   : blank | comment
    };

    # To Vet structure, we need (at most) a single instance of a
    # name| or desc| line, before the data.  This requires
    # keeping track of what labels have been seen, and incrementing
    # the number of data lines seen.  Returning undef allows parser
    # to fail.
    $G4_Site_List_Grammar{Header_Vet} = q{
hdr_line       : hdr_label separator description 
               {
                   if( exists( $main::hdr{lc($item[1])} ) ) {
		       print STDERR "Error: GRASS Site_list cannot have multiple $item[1] \\n";
		       undef ;
		   } else {
		       $main::hdr{lc($item[1])} = $item[-1] ;
		       if( $main::dline > 0 ) {
			   undef ;
		       } else {
			   1;
		       }
		   }
               }
    };

    # If we need a sink for miscellaneous lines (usually seen to be
    # just after the desc line (extraneous carriage returns)).
    $G4_Site_List_Grammar{Everything_Else} = q{
no_match       : description
               {
		   push( @main::no_match,
			 sprintf('%d %s', $thisline, $item[1] ) );
		   1;
	       }
    };


    # This is the starting point for parsing the file.
    $G4_Site_List_Grammar{Start} = q{
input          : point_record
               | hdr_line
               | comment_line
    };

    # As above, but with sink added.
    $G4_Site_List_Grammar{Start_With_Everything} = q{
input          : point_record
               | hdr_line
               | comment_line
               | no_match
    };
}
sub build_grammar {
    my $G4_SL_Grammar;

    if( $opt_S ) {
	$G4_SL_Grammar .= $G4_Site_List_Grammar{Header_Label_Strict};
    } else {
	$G4_SL_Grammar .= $G4_Site_List_Grammar{Header_Label};
    }
    $G4_SL_Grammar     .= $G4_Site_List_Grammar{Separator};
    $G4_SL_Grammar     .= $G4_Site_List_Grammar{Components};
    if( $opt_i ) {
	$string         = $G4_Site_List_Grammar{Integer_Data};
    } else {
	$string         = $G4_Site_List_Grammar{String_Data};
    }
    if( $opt_v ) {
	$replace        = '';
    } else {
	if( $opt_i ) {
	    $replace    = q{
		   push( @main::easting,     $item[1] );
		   push( @main::northing,    $item[3] );
		   push( @main::description,
			 sprintf('%10d %s', $item[6], $item[7]) );
	                   };
	} else {
	    $replace    = q{
		   push( @main::easting,     $item[1] );
		   push( @main::northing,    $item[3] );
		   push( @main::description, $item[5] );
	                   };
	}
    }
    $PLACEHOLDER        = '                   #PLACEHOLDER';
    $string             =~ s/$PLACEHOLDER/$replace/;
    $G4_SL_Grammar     .= $string;
    $G4_SL_Grammar     .= $G4_Site_List_Grammar{Comment};
    $G4_SL_Grammar     .= $G4_Site_List_Grammar{Header_Vet};
    if( $opt_v ) {
	$G4_SL_Grammar .= $G4_Site_List_Grammar{Start};
    } else {
	$G4_SL_Grammar .= $G4_Site_List_Grammar{Everything_Else};
	$G4_SL_Grammar .= $G4_Site_List_Grammar{Start_With_Everything};
    }

    return( $G4_SL_Grammar );
}

sub process_line {
    my $input = shift @_ || '';
    $parser->input( $input ) or return undef;
    return 1;
}
sub usage {
    while( @_ ) {
	print STDERR "$_[0]\n";
	shift( @_ );
    }
    print STDERR <<'    EOS';
Usage: $0 [-v] [-i|s] [-S] [-d] [-t]
        -v  Vet the file to see if it is a GRASS 4.x site_list
        -S  Strict interpretation of site_list
        -d  If re-writing file, discard non-matching stuff.
        -i  File has integer description field
        -s  File has string description field (anything)
        -t  Run internal tests
    EOS
	;
    exit 1;
}
sub test {
    my( @hdr, @comment, @point);
    &init_tests();

    foreach (@hdr, @comment, @point) {
	print "$_\n";
	if( &process_line( $_ ) ) {
	    print "Legal\n";
	} else {
	    print "Not legal\n";
	}
	if( $opt_t ) {
	    if( exists( $main::hdr{name} ) ) {
		print "name|$main::hdr{name}\n";
		delete( $main::hdr{name} );
	    }
	    if( exists( $main::hdr{desc} ) ) {
		print "desc|$main::hdr{desc}\n";
		delete( $main::hdr{desc} );
	    }
	    print "\n";
	}
    }
}
sub init_tests {
    # **************** GRASS-4.x *******************

    # Under GRASS-4.x, we can have (1 or) 2 (optional) header lines
    #  name |      or desc |
    $hdr[0]  = 'name|some_file_name';
    $hdr[1]  = 'desc|some_description';
    $hdr[2]  = 'name |some_file_name';
    $hdr[3]  = 'desc |some_description';

    $hdr[4]  = 'Name|some_file_name';
    $hdr[5]  = 'DESC|some_description';
    $hdr[6]  = 'naMe|some_file_name';
    $hdr[7]  = 'DESC |some_description';
    $hdr[8]  = 'NAME |some_file_name';
    $hdr[9]  = 'deSc |some_description';
    $hdr[10] = 'name:some_file_name';
    $hdr[11] = 'desc:some_description';

    # We are allowed to kinds of "comment lines", blank lines or
    # lines starting with a "#".
    $comment[0]  = '';
    $comment[1]  = ' ';
    $comment[2]  = '                          ';
    $comment[3]  = '#';
    $comment[4]  = '# ';
    $comment[5]  = '#                  ';

    $comment[6]  = ' #';
    $comment[7]  = ' # ';
    $comment[8]  = '  #';
    $comment[9]  = '  # ';
    $comment[10] = '  #                       ';

    # Remaining lines are called "Point Records", and are of the
    # format east | north | description
    # Iff the description field starts with a "#", what follows
    # is read as an integer (no rounding, converssion stops at
    # non-numeric).  Otherwise, this field is a string.

    $point[0]  = '1|2|hello';
    $point[1]  = '1|2 |hello';
    $point[2]  = '1| 2|hello';
    $point[3]  = '1 |2|hello';
    $point[4]  = ' 1|2|hello';
    $point[5]  = '1| 2 |hello';
    $point[6]  = '1 |2 |hello';
    $point[7]  = ' 1| 2 |hello';
    $point[8]  = '1 | 2 |hello';
    $point[9]  = ' 1 | 2 |hello';
    $point[10] = '1 | 2|hello';
    $point[11] = ' 1 | 2|hello';
    $point[12] = ' 1|2 |hello';

    $point[13] = '1:2:hello';
    $point[14] = '1:2 :hello';
    $point[15] = '1: 2:hello';
    $point[16] = '1 :2:hello';
    $point[17] = ' 1:2:hello';
    $point[18] = '1: 2 :hello';
    $point[19] = '1 :2 :hello';
    $point[20] = ' 1: 2 :hello';
    $point[21] = '1 : 2 :hello';
    $point[22] = ' 1 : 2 :hello';
    $point[23] = '1 : 2:hello';
    $point[24] = ' 1 : 2:hello';
    $point[25] = ' 1:2 :hello';

    $point[26] = '1|2|#3.4';
    $point[26] = '1|2 |#5';
    $point[27] = '1| 2|#3.4';
    $point[28] = '1 |2|#5';
    $point[29] = ' 1|2|#3.4';
    $point[30] = '1| 2 |#5';
    $point[31] = '1 |2 |#3.4';
    $point[32] = ' 1| 2 |#5';
    $point[33] = '1 | 2 |#3.4';
    $point[34] = ' 1 | 2 |#5';
    $point[35] = '1 | 2|#3.4';
    $point[36] = ' 1 | 2|#5';
    $point[37] = ' 1|2 |#3.4';

    $point[38] = '1:2:#3.4';
    $point[39] = '1:2 :#5';
    $point[40] = '1: 2:#3.4';
    $point[41] = '1 :2:#5';
    $point[42] = ' 1:2:#3.4';
    $point[43] = '1: 2 :#5';
    $point[44] = '1 :2 :#3.4';
    $point[45] = ' 1: 2 :#5';
    $point[46] = '1 : 2 :#3.4';
    $point[47] = ' 1 : 2 :#5';
    $point[48] = '1 : 2:#3.4';
    $point[49] = ' 1 : 2:#5';
    $point[50] = ' 1:2 :#3.4';
}
sub vet {
    my $line;
    while( $line = <INPUT> ) {
	if( ! &process_line( $line ) ) {
	    exit 1 if( $dline < 1 );
	    exit 2;
	}
    }
    exit 0;
}
sub rewrite {
    my $line;
    while( $line = <INPUT> ) {
	&process_line( $line ) || die "Error, not a GRASS Site_list file\n";
    }

    # Is internal name the same as external name?
    if( $input ne '-' ) {
	if( exists( $main::hdr{name} ) ) {
	    chomp( $main::hdr{name} );
	    # More complicated for -S (strict)
	    if( $main::hdr{name} =~ /^\s*(\S+)/ ) {
		my $name = $1;
		if( $name ne $input ) {
		    $name = $main::hdr{name};
		    $main::hdr{name} = $input;
		    my $desc = $main::hdr{desc};
		    chomp( $desc );
		    $desc .= "oldname( $name )";
		    $main::hdr{desc} = $desc;
		}
	    }
	} else {
	    $main::hdr{name} = $input;
	}
    }

    # Unless we are dropping extra stuff, append to description
    unless( $opt_d ) {
	chomp( $main::hdr{desc} ) if( exists( $main::hdr{desc} ) );
	foreach ( @main::no_match ) {
	    chomp( $_ );
	    $extra .= $_;
	}
	$main::hdr{desc} .= "nomatch( $extra )";
    }

    print "name|$main::hdr{name}\n";
    print "desc|$main::hdr{desc}\n";

    if( ($main::dline > 0) && ($main::dline == $main::i_dline) ) {
	for( $i = 0; $i <= $#main::easting; $i++ ) {
	    $main::description[$i] =~ /^(.{11})(.*)$/;
	    $int = $1;
	    $desc = $2;
	    $int =~ /(\d+)/;
	    $int = $1;
	    $desc =~ s/^\s+//; $desc =~ s/\s+$//;
	    printf "%12.3f | %12.3f |# %10d %s\n",
	    $main::easting[$i], $main::northing[$i], $int, $desc;
	}

    } else {
	for( $i = 0; $i <= $#main::easting; $i++ ) {
	    printf "%12.3f | %12.3f | %s\n",
	    $main::easting[$i], $main::northing[$i], $main::description[$i];
	}
    }
}



