#!/usr/bin/perl ############################################################################### # # name: cnv2html.pl # # usage: cnv2html.pl -h # # decription: - convert generic text file to standard HTML paragraph format. # - convert to standard HTML numeric/named entities. # - convert separate grave accents (only "E'" to "È" is # currently activated). # - one or more consecutive blank lines makes the end of a # paragaph. # # author: paolo guiducci # # disclaimer: You are free to use this code, being understood that I decline # any responsibility, direct or implied, for damages or # inconveniences that its use or impossibility of use may cause # (directly or indirectly). If your local law does not allow a full # decline of responsibility by the author of a software that you # use, then you are not allowed to use this code or any part of it. # # web: http://pguiducci.com ############################################################################### use Getopt::Std; my $line; my $file; my $elem; my $fl; getopts("h") or usage(); if ($opt_h) {usage()}; $file = $ARGV[0]; if ($file eq "") { @fileIn = ; } else { open (IN, "< $file") or die "unable to open $file"; @fileIn = ; close IN; } print "

"; $fl=0; foreach $elem (@fileIn) { $line=$elem; #eliminate end-of-line and left/right trailing spaces chomp($line); $line =~ s/^\s+//; $line =~ s/\s+$//; #handling end of paragraph if ($line eq "") { if ($fl) { print "

\n\n

"; $fl=0; } next; } $fl=1; #substitute multi spaces with one single space $line =~ s/\s+/ /g; print cnvEntities($line), " "; } print "

\n"; exit; ################### ### Subroutines ### ################### sub usage { print "usage: cnv2html.pl -h \n"; exit; } #convert to standard HTML numeric/named entities #important: & must be substituted first sub cnvEntities { (my $cnvLine)=@_; $cnvLine =~ s/\x26/&/g; $cnvLine =~ s/\x22/"/g; $cnvLine =~ s/\x3C/</g; $cnvLine =~ s/\x3E/>/g; $cnvLine =~ s/\x82/‚/g; $cnvLine =~ s/\x83/ƒ/g; $cnvLine =~ s/\x84/„/g; $cnvLine =~ s/\x85/…/g; $cnvLine =~ s/\x86/†/g; $cnvLine =~ s/\x87/‡/g; $cnvLine =~ s/\x88/ˆ/g; $cnvLine =~ s/\x89/‰/g; $cnvLine =~ s/\x8A/Š/g; $cnvLine =~ s/\x8B/‹/g; $cnvLine =~ s/\x8C/Œ/g; $cnvLine =~ s/\x91/‘/g; $cnvLine =~ s/\x92/’/g; $cnvLine =~ s/\x93/“/g; $cnvLine =~ s/\x94/”/g; $cnvLine =~ s/\x95/•/g; $cnvLine =~ s/\x96/–/g; $cnvLine =~ s/\x97/—/g; $cnvLine =~ s/\x98/˜/g; $cnvLine =~ s/\x99/™/g; $cnvLine =~ s/\x9A/š/g; $cnvLine =~ s/\x9B/›/g; $cnvLine =~ s/\x9C/œ/g; $cnvLine =~ s/\x9F/Ÿ/g; $cnvLine =~ s/\xA0/ /g; $cnvLine =~ s/\xA1/¡/g; $cnvLine =~ s/\xA2/¢/g; $cnvLine =~ s/\xA3/£/g; $cnvLine =~ s/\xA4/¤/g; $cnvLine =~ s/\xA5/¥/g; $cnvLine =~ s/\xA6/¦/g; $cnvLine =~ s/\xA7/§/g; $cnvLine =~ s/\xA8/¨/g; $cnvLine =~ s/\xA9/©/g; $cnvLine =~ s/\xAA/ª/g; $cnvLine =~ s/\xAB/«/g; $cnvLine =~ s/\xAC/¬/g; $cnvLine =~ s/\xAD/­/g; $cnvLine =~ s/\xAE/®/g; $cnvLine =~ s/\xAF/¯/g; $cnvLine =~ s/\xB0/°/g; $cnvLine =~ s/\xB1/±/g; $cnvLine =~ s/\xB2/²/g; $cnvLine =~ s/\xB3/³/g; $cnvLine =~ s/\xB4/´/g; $cnvLine =~ s/\xB5/µ/g; $cnvLine =~ s/\xB6/¶/g; $cnvLine =~ s/\xB7/·/g; $cnvLine =~ s/\xB8/¸/g; $cnvLine =~ s/\xB9/¹/g; $cnvLine =~ s/\xBA/º/g; $cnvLine =~ s/\xBB/»/g; $cnvLine =~ s/\xBC/¼/g; $cnvLine =~ s/\xBD/½/g; $cnvLine =~ s/\xBE/¾/g; $cnvLine =~ s/\xBF/¿/g; $cnvLine =~ s/\xC0/À/g; $cnvLine =~ s/\xC1/Á/g; $cnvLine =~ s/\xC2/Â/g; $cnvLine =~ s/\xC3/Ã/g; $cnvLine =~ s/\xC4/Ä/g; $cnvLine =~ s/\xC5/Å/g; $cnvLine =~ s/\xC6/Æ/g; $cnvLine =~ s/\xC7/Ç/g; $cnvLine =~ s/\xC8/È/g; $cnvLine =~ s/\xC9/É/g; $cnvLine =~ s/\xCA/Ê/g; $cnvLine =~ s/\xCB/Ë/g; $cnvLine =~ s/\xCC/Ì/g; $cnvLine =~ s/\xCD/Í/g; $cnvLine =~ s/\xCE/Î/g; $cnvLine =~ s/\xCF/Ï/g; $cnvLine =~ s/\xD0/Ð/g; $cnvLine =~ s/\xD1/Ñ/g; $cnvLine =~ s/\xD2/Ò/g; $cnvLine =~ s/\xD3/Ó/g; $cnvLine =~ s/\xD4/Ô/g; $cnvLine =~ s/\xD5/Õ/g; $cnvLine =~ s/\xD6/Ö/g; $cnvLine =~ s/\xD7/×/g; $cnvLine =~ s/\xD8/Ø/g; $cnvLine =~ s/\xD9/Ù/g; $cnvLine =~ s/\xDA/Ú/g; $cnvLine =~ s/\xDB/Û/g; $cnvLine =~ s/\xDC/Ü/g; $cnvLine =~ s/\xDD/Ý/g; $cnvLine =~ s/\xDE/Þ/g; $cnvLine =~ s/\xDF/ß/g; $cnvLine =~ s/\xE0/à/g; $cnvLine =~ s/\xE1/á/g; $cnvLine =~ s/\xE2/â/g; $cnvLine =~ s/\xE3/ã/g; $cnvLine =~ s/\xE4/ä/g; $cnvLine =~ s/\xE5/å/g; $cnvLine =~ s/\xE6/æ/g; $cnvLine =~ s/\xE7/ç/g; $cnvLine =~ s/\xE8/è/g; $cnvLine =~ s/\xE9/é/g; $cnvLine =~ s/\xEA/ê/g; $cnvLine =~ s/\xEB/ë/g; $cnvLine =~ s/\xEC/ì/g; $cnvLine =~ s/\xED/í/g; $cnvLine =~ s/\xEE/î/g; $cnvLine =~ s/\xEF/ï/g; $cnvLine =~ s/\xF0/ð/g; $cnvLine =~ s/\xF1/ñ/g; $cnvLine =~ s/\xF2/ò/g; $cnvLine =~ s/\xF3/ó/g; $cnvLine =~ s/\xF4/ô/g; $cnvLine =~ s/\xF5/õ/g; $cnvLine =~ s/\xF6/ö/g; $cnvLine =~ s/\xF7/÷/g; $cnvLine =~ s/\xF8/ø/g; $cnvLine =~ s/\xF9/ù/g; $cnvLine =~ s/\xFA/ú/g; $cnvLine =~ s/\xFB/û/g; $cnvLine =~ s/\xFC/ü/g; $cnvLine =~ s/\xFD/ý/g; $cnvLine =~ s/\xFE/þ/g; $cnvLine =~ s/\xFF/ÿ/g; #substitute separate grave accent # $cnvLine =~ s/A'/À/g; $cnvLine =~ s/E'/È/g; # $cnvLine =~ s/I'/Ì/g; # $cnvLine =~ s/O'/Ò/g; # $cnvLine =~ s/U'/Ù/g; # $cnvLine =~ s/a'/à/g; # $cnvLine =~ s/e'/è/g; # $cnvLine =~ s/i'/ì/g; # $cnvLine =~ s/o'/ò/g; # $cnvLine =~ s/u'/ù/g; return $cnvLine; }