#!/usr/bin/perl -T

my $debug = 0;

my @database_dirs =
  (
   "../",
   "/"
  );

my @template_dirs =
  (
   "../",
   "/"
  );

# ----------------------------------------------------------------------

require 5.000;
use File::Basename;
use strict;

my $is_cgi = ! @ARGV;
$is_cgi = 1;

my $data;			# Data file, if CGI relative to @database_dirs
my $template;			# Template, if CGI relative to @template_dirs
my $from;			# First record number, starting from 1
my $count;			# The number of records to display, 0 == all
my $match;			# Regular expression
my $mcol;			# Column for match, starting from 1
my $macchars;			# If we are to translate from MacOS to 8859-1 

if ($is_cgi) {

  use CGI qw(param header);
  use CGI::Carp qw(fatalsToBrowser);

  # We get the parameters
  my $d     = param('data');
  my $t     = param('template');
  $from     = param('from') || 1;
  $count    = param('count') || 0;
  $match    = param('match');
  $mcol     = param('mcol') || 1;
  $macchars = param('macchars');

  $d or usage("Input data file was not given");
  $t or usage("HTML template file was not given");

  $d =~ m|^/*(\w[\w\.\-]*)$| or usage("Not a valid file name $d");
  $d = $1;
  $t =~ m|^/*(\w[\w\.\-]*)$| or usage("Not a valid file name $t");
  $t = $1;

  foreach my $database_dir (@database_dirs) {
    if (-f "$database_dir/$d") {
      $data = "$database_dir/$d";
      last;
    }
  }

  foreach my $template_dir (@template_dirs) {
    if (-f "$template_dir/$t") {
      $template = "$template_dir/$t";
      last;
    }
  }    

  $data or usage("Data file \"$d\" not found in",@database_dirs);
  $template or usage("Template \"$t\" not found in",@template_dirs);

  print header();

} else {

  my $outfile = pop @ARGV;
  $outfile or usage("You have to specify an out file");
  $outfile =~ /^(\w[\w\.\-]*)$/ or usage("You have to specify an out file");
  $outfile = $1;

  my %param = map {(split('=', $_, 2))} @ARGV;
  map {print "==== option $_ = $param{$_}<br>\n"} keys %param if $debug;

  # We get the parameters
  $data     = $param{'data'};
  $template = $param{'template'};
  $from     = $param{'from'} || 1;
  $count    = $param{'count'} || 0;
  $match    = $param{'match'};
  $mcol     = $param{'mcol'} || 1;
  $macchars = $param{'macchars'};

  $data or usage("Input data file was not given");
  $template or usage("HTML template file was not given");

  open(STDOUT,">$outfile")
    or usage("Can't redirect STDOUT to file $outfile - $!");

}

$from  =~ /^\d+$/ or usage("'from' has to be a number");
$count =~ /^\d+$/ or usage("'count' has to be a number");

if ($match) {
  $match =~ s/^\s+//;
  $match =~ s/\s+$//;
  $mcol =~ /^\d+$/ or usage("Column has has to be a number: $mcol");
}


###########################################################################
#
#  Character transformation for MacOS
#
###########################################################################

# This table is only used if "macchars=yes" is set. MacOS (at least not
# the traditional one) is not using ISO-8859-1. So we may translate
# from the mac charset to ISO-8859-1.

my @mac_to_html_entity =
  (
   '',			# 0x0 = 0 = 
   '',			# 0x1 = 1 = 
   '',			# 0x2 = 2 = 
   '',			# 0x3 = 3 = 
   '',			# 0x4 = 4 = 
   '',			# 0x5 = 5 = 
   '',				# 0x6 = 6 = 
   '',				# 0x7 = 7 = 
   '',				# 0x8 = 8 = 
   ' ',				# 0x9 = 9 = 
   '<BR>',			# 0xA = 10 = 
   '',				# 0xB = 11 = 
   '',				# 0xC = 12 = 
   '<BR>',			# 0xD = 13 = 
   '',				# 0xE = 14 = 
   '',				# 0xF = 15 = 
   '',				# 0x10 = 16 = 
   '',				# 0x11 = 17 = 
   '',				# 0x12 = 18 = 
   '',				# 0x13 = 19 = 
   '',				# 0x14 = 20 = 
   '',				# 0x15 = 21 = 
   '',				# 0x16 = 22 = 
   '',				# 0x17 = 23 = 
   '',				# 0x18 = 24 = 
   '',				# 0x19 = 25 = 
   '',				# 0x1A = 26 = 
   '',				# 0x1B = 27 = 
   '',				# 0x1C = 28 = 
   '',				# 0x1D = 29 = 
   '',				# 0x1E = 30 = 
   '',				# 0x1F = 31 = 
   ' ',				# 0x20 = 32 =  
   '!',				# 0x21 = 33 = !
   '&quot;',			# 0x22 = 34 = "
   '#',           		# 0x23 = 35 = #
   '$',				# 0x24 = 36 = $
   '%',				# 0x25 = 37 = %
   '&amp;',			# 0x26 = 38 = &
   "'",				# 0x27 = 39 = '
   '(',				# 0x28 = 40 = (
   ')',				# 0x29 = 41 = )
   '*',				# 0x2A = 42 = *
   '+',				# 0x2B = 43 = +
   ',',				# 0x2C = 44 = ,
   '-',				# 0x2D = 45 = -
   '.',				# 0x2E = 46 = .
   '/',				# 0x2F = 47 = /
   '0',				# 0x30 = 48 = 0
   '1',				# 0x31 = 49 = 1
   '2',				# 0x32 = 50 = 2
   '3',				# 0x33 = 51 = 3
   '4',				# 0x34 = 52 = 4
   '5',				# 0x35 = 53 = 5
   '6',				# 0x36 = 54 = 6
   '7',				# 0x37 = 55 = 7
   '8',				# 0x38 = 56 = 8
   '9',				# 0x39 = 57 = 9
   ':',				# 0x3A = 58 = :
   ';',				# 0x3B = 59 = ;
   '&lt;',			# 0x3C = 60 = <
   '=',				# 0x3D = 61 = =
   '&gt;',			# 0x3E = 62 = >
   '?',				# 0x3F = 63 = ?
   '@',				# 0x40 = 64 = @
   'A',				# 0x41 = 65 = A
   'B',				# 0x42 = 66 = B
   'C',				# 0x43 = 67 = C
   'D',				# 0x44 = 68 = D
   'E',				# 0x45 = 69 = E
   'F',				# 0x46 = 70 = F
   'G',				# 0x47 = 71 = G
   'H',				# 0x48 = 72 = H
   'I',				# 0x49 = 73 = I
   'J',				# 0x4A = 74 = J
   'K',				# 0x4B = 75 = K
   'L',				# 0x4C = 76 = L
   'M',				# 0x4D = 77 = M
   'N',				# 0x4E = 78 = N
   'O',				# 0x4F = 79 = O
   'P',				# 0x50 = 80 = P
   'Q',				# 0x51 = 81 = Q
   'R',				# 0x52 = 82 = R
   'S',				# 0x53 = 83 = S
   'T',				# 0x54 = 84 = T
   'U',				# 0x55 = 85 = U
   'V',				# 0x56 = 86 = V
   'W',				# 0x57 = 87 = W
   'X',				# 0x58 = 88 = X
   'Y',				# 0x59 = 89 = Y
   'Z',				# 0x5A = 90 = Z
   '[',				# 0x5B = 91 = [
   "\\",			# 0x5C = 92 = \
   ']',				# 0x5D = 93 = ]
   '^',				# 0x5E = 94 = ^
   '_',				# 0x5F = 95 = _
   '`',				# 0x60 = 96 = `
   'a',				# 0x61 = 97 = a
   'b',				# 0x62 = 98 = b
   'c',				# 0x63 = 99 = c
   'd',				# 0x64 = 100 = d
   'e',				# 0x65 = 101 = e
   'f',				# 0x66 = 102 = f
   'g',				# 0x67 = 103 = g
   'h',				# 0x68 = 104 = h
   'i',				# 0x69 = 105 = i
   'j',				# 0x6A = 106 = j
   'k',				# 0x6B = 107 = k
   'l',				# 0x6C = 108 = l
   'm',				# 0x6D = 109 = m
   'n',				# 0x6E = 110 = n
   'o',				# 0x6F = 111 = o
   'p',				# 0x70 = 112 = p
   'q',				# 0x71 = 113 = q
   'r',				# 0x72 = 114 = r
   's',				# 0x73 = 115 = s
   't',				# 0x74 = 116 = t
   'u',				# 0x75 = 117 = u
   'v',				# 0x76 = 118 = v
   'w',				# 0x77 = 119 = w
   'x',				# 0x78 = 120 = x
   'y',				# 0x79 = 121 = y
   'z',				# 0x7A = 122 = z
   '{',				# 0x7B = 123 = {
   '|',				# 0x7C = 124 = |
   '}',				# 0x7D = 125 = }
   '~',				# 0x7E = 126 = ~
   '',				# 0x7F = 127 = DEL
   '&Auml;',			# 0x80 = 128 = 
   '&Aring;',			# 0x81 = 129 = 
   '&Ccedil;',			# 0x82 = 130 = 
   '&Eacute;',			# 0x83 = 131 = 
   '&Ntilde;',			# 0x84 = 132 = 
   '&Ouml;',			# 0x85 = 133 = 
   '&Uuml;',			# 0x86 = 134 = 
   '&aacute;',			# 0x87 = 135 = 
   '&agrave;',			# 0x88 = 136 = 
   '&acirc;',			# 0x89 = 137 = 
   '&auml;',			# 0x8A = 138 = 
   '&atilde;',			# 0x8B = 139 = 
   '&aring;',			# 0x8C = 140 = 
   '&ccedil;',			# 0x8D = 141 = 
   '&eacute;',			# 0x8E = 142 = 
   '&egrave;',			# 0x8F = 143 = 
   '&ecirc;',			# 0x90 = 144 = 
   '&euml;',			# 0x91 = 145 = 
   '&iacute;',			# 0x92 = 146 = 
   '&igrave;',			# 0x93 = 147 = 
   '&icirc;',			# 0x94 = 148 = 
   '&iuml;',			# 0x95 = 149 = 
   '&ntilde;',			# 0x96 = 150 = 
   '&oacute;',			# 0x97 = 151 = 
   '&ograve;',			# 0x98 = 152 = 
   '&ocirc;',			# 0x99 = 153 = 
   '&ouml;',			# 0x9A = 154 = 
   '&otilde;',			# 0x9B = 155 = 
   '&uacute;',			# 0x9C = 156 = 
   '&ugrave;',			# 0x9D = 157 = 
   '&ucirc;',			# 0x9E = 158 = 
   '&uuml;',			# 0x9F = 159 = 
   '#',           		 # 0xA0 = 160 = 
   '&#176;',      		 # 0xA1 = 161 = 
   '&#162;',      		 # 0xA2 = 162 = 
   '&#163;',      		 # 0xA3 = 163 = 
   '&#167;',      		 # 0xA4 = 164 = 
   '&#215;',      		 # 0xA5 = 165 =   'x' (multiplication)
   '&#182;',      		 # 0xA6 = 166 = 
   '&szlig;',			# 0xA7 = 167 = 
   '&reg;',			# 0xA8 = 168 =   not in HTML 2.0
   '&copy;',			# 0xA9 = 169 =   not in HTML 2.0
   '<SUP><TT>TM</TT></SUP>',	# 0xAA = 170 = 
   '&#180;',      		 # 0xAB = 171 = 
   '&#168;',      		 # 0xAC = 172 = 
   '&lt;&gt;',			# 0xAD = 173 = 
   '&AElig;',			# 0xAE = 174 = 
   '&Oslash;',			# 0xAF = 175 = 
   '#',           		 # 0xB0 = 176 = 
   '&#177;',      		 # 0xB1 = 177 = 
   '&lt;=',			# 0xB2 = 178 = 
   '&gt;=',			# 0xB3 = 179 = 
   '&#165;',      		 # 0xB4 = 180 = 
   '&#181;',      		 # 0xB5 = 181 = 
   '#',           		 # 0xB6 = 182 = 
   '#',           		 # 0xB7 = 183 = 
   '#',           		 # 0xB8 = 184 = 
   '#',           		 # 0xB9 = 185 = 
   '#',           		 # 0xBA = 186 = 
   '&#170;',      		 # 0xBB = 187 = 
   '&#186;',      		 # 0xBC = 188 = 
   '#',           		 # 0xBD = 189 = 
   '&aelig;',			# 0xBE = 190 = 
   '&oslash;',			# 0xBF = 191 = 
   '&#191;',      		 # 0xC0 = 192 = 
   '&#161;',      		 # 0xC1 = 193 = 
   '&#172;',      		 # 0xC2 = 194 = 
   '#',           		 # 0xC3 = 195 = 
   'f',				# 0xC4 = 196 = 
   '#',           		 # 0xC5 = 197 = 
   '#',           		 # 0xC6 = 198 = 
   '&#171;',      		 # 0xC7 = 199 = 
   '&#187;',      		 # 0xC8 = 200 = 
   '...',			# 0xC9 = 201 = 
   '&nbsp;',			# 0xCA = 202 =  
   '&Agrave;',			# 0xCB = 203 = 
   '&Atilde;',			# 0xCC = 204 = 
   '&Otilde;',			# 0xCD = 205 = 
   'OE',			# 0xCE = 206 = 
   'oe',			# 0xCF = 207 = 
   '-',				# 0xD0 = 208 = 
   '-',				# 0xD1 = 209 = 
   '&quot;',			# 0xD2 = 210 = 
   '&quot;',			# 0xD3 = 211 = 
   "'",				# 0xD4 = 212 = 
   "'",				# 0xD5 = 213 = 
   '&#247;',      		 # 0xD6 = 214 = 
   '#',           		 # 0xD7 = 215 = 
   '&yuml;',			# 0xD8 = 216 = 
   '#',           		 # 0xD9 = 217 = 
   '/',				# 0xDA = 218 = 
   '&#164;',      		 # 0xDB = 219 = 
   '&lt;',			# 0xDC = 220 = 
   '&gt;',			# 0xDD = 221 = 
   'fi',			# 0xDE = 222 = 
   'fl',			# 0xDF = 223 = 
   '#',           		 # 0xE0 = 224 = 
   '&#183;',      		 # 0xE1 = 225 = 
   ',',				# 0xE2 = 226 = 
   ',,',			# 0xE3 = 227 = 
   '#',           		 # 0xE4 = 228 = 
   '&Acirc;',			# 0xE5 = 229 = 
   '&Ecirc;',			# 0xE6 = 230 = 
   '&Aacute;',			# 0xE7 = 231 = 
   '&Euml;',			# 0xE8 = 232 = 
   '&Egrave;',			# 0xE9 = 233 = 
   '&Iacute;',			# 0xEA = 234 = 
   '&Icirc;',			# 0xEB = 235 = 
   '&Iuml;',			# 0xEC = 236 = 
   '&Igrave;',			# 0xED = 237 = 
   '&Oacute;',			# 0xEE = 238 = 
   '&Ocirc;',			# 0xEF = 239 = 
   '#',           		# 0xF0 = 240 = 
   '&Ograve;',			# 0xF1 = 241 = 
   '&Uacute;',			# 0xF2 = 242 = 
   '&Ucirc;',			# 0xF3 = 243 = 
   '&Ugrave;',			# 0xF4 = 244 = 
  );


###########################################################################
#
#  We look at the template as code, we execute it
#
###########################################################################

open(DB, $data)
  or usage("Can't open file \"$data\": $!");
my @records;
my $i = 1;

while (<DB>) {
  my $record = parse_line($_);

  if ($match) {
    print "MATCH $record->{$mcol} $mcol $match<br>\n" if $debug;
    next unless exists $record->{$mcol};
    next unless $record->{$mcol} =~ m{$match}oi;
  }
  print "ITERATE i = $i, from = $from, count = $count<br>\n" if $debug;
  next if $i++ < $from;
  last if $count and ($i > ($from + $count));
  push(@records, $record);
}

close DB;

open(TEMPLATE, $template)
  or usage("Can't open file \"$template\": $!");
do_page(join('', <TEMPLATE>));
close TEMPLATE;

close(STDOUT) unless $is_cgi;


###########################################################################
#
#  Substitution
#
###########################################################################

sub do_page {
  my $page = shift;
  foreach (split(m|(<repeat>.*?</repeat>)|si, $page)) {
    if (m|<repeat>(.*?)</repeat>|si) {
      print "**** start repeat<br>\n" if $debug;
      do_repeat($1);
      print "**** end repeat<br>\n" if $debug;
    } else {
      print "**** start text<br>\n" if $debug;
      print;
      print "**** end text<br>\n" if $debug;
    }
  }
}

sub do_repeat {
  my $repeat = shift;
  my $text = '';
  my @parts = split(/<next>/i, $repeat);

  while (@records) {
    print "** start record<br>\n" if $debug;
    foreach my $p (@parts) {
      last unless @records;	# FIXME: use or not????
      my $record = shift @records;
      my $part = $p;		# Copy so can change
      $part =~ s/\$arg([0-9]{1,3})/exists $record->{$1} ? $record->{$1}:''/ge;
      print "* start part<br>\n" if $debug;
      print $part;
      print "* end part<br>\n" if $debug;
    }
    print "** end record<br>\n" if $debug;
  }
}

###########################################################################
#
#  Parse one line
#
###########################################################################

sub parse_line {
  my $line = shift;
  chomp($line);

  print "LINE: $line<br>\n" if $debug;

  my %record;
  my $entry;
  my $i = 1;			# First index

  while ($line =~ /\S/) {
    if ($line =~
	s {                     # If the field is quoted it
	   ^\"                  # begins with a quote, 
	   ((?:[^\"]|\"\")*)    # field contains no quotes except in double,
	   \"                   # an end quote,
	   (?:,|$)              # and finally a comma or end of string.
	  } {}x) {              # We remove all this and field is in $
      $entry = $1;
    } elsif ($line =~
	     s {                # Else field is not quoted and it
		^               # begins without a quote,
		(.*?)           # match as few as possible before
		(?:,|$)         # a comma or end of string.
	       } {}x) {         # We remove all this and field is in $
      $entry = $1;
    } else {
      die "Can't parse the line $line";
    }

    $entry =~ s/\"\"/\"/g;

  
    $record{$i++} = $entry;

  }

  return \%record;
}


###########################################################################
#
#  Usage and error reporting
#
###########################################################################

sub usage {
  my $text = @_ ? "\n**** ERROR: " . join("\n",@_) . "\n" : '';

  my $script = basename($0);
  $text .= <<HERE;

Usage: $script [ Options ] data=datafile template=template-file outfile

If you run this script as a CGI you don't specify an output file.

The options are

  from=row      The row number of the first record to display. 1 is first row.
                Note that the row numbers are calculated after matching.
  count=n       Number of records to display.
  match=Re      Regular expression to select records.
  mcol=ColNum   Column number for match 1...
  macchars=yes  If the CSV is from MacOS you can translate to ISO-8859-1

HERE

  die $text;
}

