goerz · August 1, 2010 17:43
diff --git a/latex_checkascii.pl b/latex_checkascii.pl
 #!/usr/bin/perl -w
 use strict;
 use utf8;
 ############################################################################
 #    Copyright (C) 2008 by Michael Goerz                                   #
 #    http://www.physik.fu-berlin.de/~mgoerz                                #
 #                                                                          #
 #    This program is free software; you can redistribute it and/or modify  #
 #    it under the terms of the GNU General Public License as published by  #
 #    the Free Software Foundation; either version 3 of the License, or     #
 #    (at your option) any later version.                                   #
 #                                                                          #
 #    This program is distributed in the hope that it will be useful,       #
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of        #
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         #
 #    GNU General Public License for more details.                          #
 ############################################################################


 my $usage = '
 Usage: latex_checkascii.pl [options] files

 Options are:
    --noreplace    No replacements
    --german       Replacements of the german bable package

 The program can be used to ensure that latex files are in pure ascii.
 It does the following things:
 - a few standard nonascii characters (e.g. em-dash) are replaced by their
  tex representation, unless the --noreplace option is given.
 - if the --german option is given, the conventions of
  \usepackage[german]{babel} are followed. (eg. u-umlaut => "u)
 - if the latex files contain an unescaped quote (") that is not part
  of the known replacements, a warning is issued.
 - if the latex files contain more than one consecutive period, a warning
  is issuded (this should probably be a \dots)
 - if after replacements are done there are characters left in the file
  with a code point higher than 128, a warning is issued.
 ';

 my $german = 0;
 my $check_for_quotes = 1;

 my @files = ();
 my %replacements = (
    '–' => '--',
    '—' => '---',
    '“' => '``',
    '”' => "''",
    '„' => '``',
    '’' => "'",
 );


 foreach my $param (@ARGV){
    if ($param eq '--german'){
        $german = 1;
    } elsif (($param eq '--help') or ($param eq '-h') or ($param eq '--usage') ){
        print $usage;
        exit(0);
    } elsif ( $param eq '--noreplace' ){
        %replacements = ();
    } else {
        push(@files, $param);
    }
 };


 if (@files == 0){
    die("You didn't provide a latex file to check\n");
 }


 my %germanreplacements = (
    'ä' => '"a',
    'ö' => '"o',
    'ü' => '"u',
    'Ä' => '"A',
    'Ö' => '"O',
    'Ü' => '"U',
    'ß' => '"s',
    '„' => '"`',
    '”' => "\"'",
    '«' => '"<',
    '»' => '">',
    '‹' => '\flq',
    '›' => '\frq'
 );

 if ($german){
    # put in german replacements.
    # If a replacement is defined both in the default dict and the german dict,
    # german takes the preference.
    %replacements = (%replacements, %germanreplacements)
 }

 foreach my $file (@files){

    open(INFILE, "<:utf8" , $file) or die("Couldn't open $file\n");
    die("$file.asciiout already exists\n") if (-f "$file.asciiout");
    open(OUTFILE, ">:utf8" , "$file.asciiout") or die("Couldn't open $file\n");
    print("Analyzing $file\n");
    my $linenumber = 0;
    foreach my $line (<INFILE>){
        $linenumber++;
        if ($check_for_quotes){
            if (($line =~ /(.?)(".?)/) and ($1 ne '\\')){
                # test membership of $2 in the values of the replacements hash
                unless ( grep { $_ eq "$2" } values(%replacements) ) {
                    warn("  Line $linenumber contains unescaped quotation marks\n");
                }
            }
        }
        if ($line =~ /\.{2,}/){
            warn("  Line $linenumber contains more than one consecutive period.\n");
        }
        for my $letter (split '', $line){
            if (defined($replacements{$letter})){
                warn("  Replacing '$letter' with $replacements{$letter} in line $linenumber.\n");
                $letter = $replacements{$letter};
            }
            if (ord($letter) > 128){
                warn("  Non-ascii letter '$letter' in line $linenumber\n");
            }
            print OUTFILE $letter;
        }
    }

    rename($file, "$file~");
    rename("$file.asciiout", $file);

 }


 print "Done\n";
	#!/usr/bin/perl -w
	use strict;
	use utf8;
	############################################################################
	# Copyright (C) 2008 by Michael Goerz #
	# http://www.physik.fu-berlin.de/~mgoerz #
	# #
	# This program is free software; you can redistribute it and/or modify #
	# it under the terms of the GNU General Public License as published by #
	# the Free Software Foundation; either version 3 of the License, or #
	# (at your option) any later version. #
	# #
	# This program is distributed in the hope that it will be useful, #
	# but WITHOUT ANY WARRANTY; without even the implied warranty of #
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
	# GNU General Public License for more details. #
	############################################################################


	my $usage = '
	Usage: latex_checkascii.pl [options] files

	Options are:
	--noreplace No replacements
	--german Replacements of the german bable package

	The program can be used to ensure that latex files are in pure ascii.
	It does the following things:
	- a few standard nonascii characters (e.g. em-dash) are replaced by their
	tex representation, unless the --noreplace option is given.
	- if the --german option is given, the conventions of
	\usepackage[german]{babel} are followed. (eg. u-umlaut => "u)
	- if the latex files contain an unescaped quote (") that is not part
	of the known replacements, a warning is issued.
	- if the latex files contain more than one consecutive period, a warning
	is issuded (this should probably be a \dots)
	- if after replacements are done there are characters left in the file
	with a code point higher than 128, a warning is issued.
	';

	my $german = 0;
	my $check_for_quotes = 1;

	my @files = ();
	my %replacements = (
	'–' => '--',
	'—' => '---',
	'“' => '``',
	'”' => "''",
	'„' => '``',
	'’' => "'",
	);


	foreach my $param (@ARGV){
	if ($param eq '--german'){
	$german = 1;
	} elsif (($param eq '--help') or ($param eq '-h') or ($param eq '--usage') ){
	print $usage;
	exit(0);
	} elsif ( $param eq '--noreplace' ){
	%replacements = ();
	} else {
	push(@files, $param);
	}
	};


	if (@files == 0){
	die("You didn't provide a latex file to check\n");
	}


	my %germanreplacements = (
	'ä' => '"a',
	'ö' => '"o',
	'ü' => '"u',
	'Ä' => '"A',
	'Ö' => '"O',
	'Ü' => '"U',
	'ß' => '"s',
	'„' => '"`',
	'”' => "\"'",
	'«' => '"<',
	'»' => '">',
	'‹' => '\flq',
	'›' => '\frq'
	);

	if ($german){
	# put in german replacements.
	# If a replacement is defined both in the default dict and the german dict,
	# german takes the preference.
	%replacements = (%replacements, %germanreplacements)
	}

	foreach my $file (@files){

	open(INFILE, "<:utf8" , $file) or die("Couldn't open $file\n");
	die("$file.asciiout already exists\n") if (-f "$file.asciiout");
	open(OUTFILE, ">:utf8" , "$file.asciiout") or die("Couldn't open $file\n");
	print("Analyzing $file\n");
	my $linenumber = 0;
	foreach my $line (<INFILE>){
	$linenumber++;
	if ($check_for_quotes){
	if (($line =~ /(.?)(".?)/) and ($1 ne '\\')){
	# test membership of $2 in the values of the replacements hash
	unless ( grep { $_ eq "$2" } values(%replacements) ) {
	warn(" Line $linenumber contains unescaped quotation marks\n");
	}
	}
	}
	if ($line =~ /\.{2,}/){
	warn(" Line $linenumber contains more than one consecutive period.\n");
	}
	for my $letter (split '', $line){
	if (defined($replacements{$letter})){
	warn(" Replacing '$letter' with $replacements{$letter} in line $linenumber.\n");
	$letter = $replacements{$letter};
	}
	if (ord($letter) > 128){
	warn(" Non-ascii letter '$letter' in line $linenumber\n");
	}
	print OUTFILE $letter;
	}
	}

	rename($file, "$file~");
	rename("$file.asciiout", $file);

	}


	print "Done\n";