Last active
August 29, 2015 14:19
-
-
Save alexrudy/0d04bccd3680c947a42f to your computer and use it in GitHub Desktop.
Prepare LaTeX Manuscripts for Submission
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# | |
# submit.pl | |
# A simple tool to prepare .tex manuscirpts for submission. | |
# | |
# Copyright (c) 2015, Alexander Rudy | |
# All rights reserved. | |
# | |
# Redistribution and use in source and binary forms, with or without | |
# modification, are permitted provided that the following conditions are | |
# met: | |
# | |
# 1. Redistributions of source code must retain the above copyright | |
# notice, this list of conditions and the following disclaimer. | |
# | |
# 2. Redistributions in binary form must reproduce the above copyright | |
# notice, this list of conditions and the following disclaimer in the | |
# documentation and/or other materials provided with the distribution. | |
# | |
# 3. Neither the name of the copyright holder nor the names of its | |
# contributors may be used to endorse or promote products derived from | |
# this software without specific prior written permission. | |
# | |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECTTHIS SOFTWARE | |
# IS PRAL,THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND | |
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, | |
# BUT NOT LIMITFITS;THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS | |
# AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, | |
# INCLUDING,LUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF | |
# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH | |
# DAMAGE. | |
use strict 'vars'; | |
use File::Copy; | |
use File::Spec::Functions; | |
use File::Path 'make_path'; | |
# This is a PERL program to handle common tasks for submitting .tex | |
# manuscripts to sites like arXiv.org | |
# | |
# Usage: submit.pl paper.tex [outfolder] | |
# | |
# Positional Arguments: | |
# paper.tex The name of the main .tex file to handle. | |
# outfolder [Optional] The name of the folder into which all | |
# compiled output will be placed. Defaults to 'output' | |
# | |
# | |
# This script will work on your file 'paper.tex', placing it and required | |
# auxiliary files (figures, custom .sty files) into the 'outfolder/' directory. | |
# | |
# The MAIN subroutine handles most of the work, and calls 5 functions which do | |
# the important jobs: | |
# | |
# 1. insertBBL replaces \bibliography{} commands with the contents of the | |
# paper.bbl file, so that references are contained in your primary | |
# manuscript file as required by many publications. | |
# | |
# 2. collapseInputs replaces \input{} commands with the contents of the input | |
# file (which is assumed to end with .tex). | |
# | |
# 3. removeComments strips any '%' comments or \begin{comment}/\end{comment} | |
# environments from your latex document, but should leave '\%' intact when | |
# the percent symbol is required. | |
# | |
# 4. collectFigures renames all of your figures in sequential, increasing order | |
# ('figure01', 'figure02', ...) in your .tex source, and moves those renamed | |
# files to your 'outfolder'. This function does not handle subfigures | |
# differently from figures. | |
# | |
# 5. collectStyles finds any style file referenced by a \usepackage command in | |
# your .tex source, and copies that file to your 'outfolder' for inclusion | |
# with your submission. | |
# | |
# You can disable any of these features by commenting out the relevant line in | |
# the MAIN subroutine below. | |
# | |
# This program does nothing sophisticated about argument parsing or management | |
# at this point. | |
# | |
# You should always try compiling your output paper to ensure that nothing has | |
# been messed up. | |
# | |
# This script was based on one for stripping .tex comments, from here: | |
# http://tex.stackexchange.com/questions/83663/utility-to-strip-comments-from-latex-source | |
# | |
&MAIN(@ARGV); | |
sub MAIN { | |
my ($filehandle, $outputdir) = @_; | |
# Handle arguments. | |
unless (-e $filehandle) { | |
print "Usage: $0 filename.tex outfolder\n"; | |
die "Can't find input file '$filehandle'" ; | |
} | |
$outputdir = $outputdir || "output"; | |
make_path($outputdir); | |
my $outputfile = catfile($outputdir, $filehandle); | |
# Read the manuscript. | |
open FILE, "<$filehandle"; | |
my @doc = <FILE>; | |
close FILE; | |
# These 5 subroutines provide the core functionality. | |
# Each of them may be commented out. They independently modify | |
# @doc inplace. | |
&insertBBL(\@doc, $filehandle); | |
&collapseInputs(\@doc); | |
&removeComments(\@doc); | |
&collectFigures(\@doc, $outputdir); | |
&collectStyles(\@doc, $outputdir); | |
# Save the result. | |
open(OFILE, ">", $outputfile) or die "Can't open output file $outputfile"; | |
foreach my $line ( @doc ){ | |
print OFILE $line; | |
} | |
close(OFILE); | |
return 1; | |
} | |
sub collectFigures { | |
# Collect figures, moving them to the output directory, and renaming them to be | |
# figure01 etc in order. Attempts to preserve the correct file extension. | |
# | |
# Arguments: | |
# @docarray: array of document lines, name of the output directory. | |
# $outputdir: output directory name, for the figures. | |
# | |
# Returns: | |
# @docarray: The array will be modified in place. | |
# | |
my ($docarray, $outputdir) = @_; | |
my $isVerbatimEnvironment = "no"; | |
my @newdoc; | |
my @figures; | |
my $figurenum = 1; | |
foreach my $line ( @{$docarray} ){ | |
$isVerbatimEnvironment = "yes" if ( $line =~ /^\\begin{verbatim}/ ); | |
if ($isVerbatimEnvironment eq "no") { | |
if ($line =~ /\\includegraphics/) { | |
(my $figurename = $line ) =~ s/.*?\\includegraphics\[.*?\]{([^\s]+)}\s*$/$1/; | |
push @figures, $figurename; | |
my ($ext) = $figurename =~ /\.([^\.]+)$/; | |
my $newname = sprintf("figure%02d.$ext", $figurenum); | |
$line =~ s/$figurename/$newname/; | |
copy($figurename, catfile($outputdir,$newname)); | |
$figurenum++; | |
} | |
} | |
push @newdoc, $line; | |
$isVerbatimEnvironment = "no" if ( $line =~ /^\\end{verbatim}/ ); | |
} | |
@{$docarray} = @newdoc; | |
return 1; | |
} | |
sub collectStyles { | |
# Collect custom style files. Custom style files are identified by the | |
# \usepackage command, and the presence of a corresponding .sty file in the | |
# working directory. They are copied to the output directory. | |
# | |
# Arguments: | |
# @docarray: array of document lines for parsing. | |
# $outputdir: output directory name, for the figures. | |
# | |
# Returns: | |
# @docarray: The array will be modified in place. | |
# | |
my ($docarray, $outputdir) = @_; | |
my $isVerbatimEnvironment = "no"; | |
my @newdoc; | |
my @styles; | |
foreach my $line ( @{$docarray} ){ | |
$isVerbatimEnvironment = "yes" if ( $line =~ /^\\begin{verbatim}/ ); | |
if ($isVerbatimEnvironment eq "no") { | |
if ($line =~ /\s*\\usepackage{.*}\s*$/) { | |
(my $stylename = $line ) =~ s/\s*\\usepackage{([^\s]+)}\s*$/$1/; | |
$stylename =~ s/(.sty)?$/.sty/; | |
if (-e $stylename) { | |
push @styles, $stylename; | |
copy($stylename, catfile($outputdir,$stylename)); | |
} | |
} | |
} | |
push @newdoc, $line; | |
$isVerbatimEnvironment = "no" if ( $line =~ /^\\end{verbatim}/ ); | |
} | |
@{$docarray} = @newdoc; | |
return 1; | |
} | |
sub insertBBL { | |
# Insert the contents of a .bbl file into the text of the manuscript in place | |
# of the \bibliography command. Most publications require that the entries | |
# in your bibliography be included in a {thebibliography} environment. | |
# Fortunately, BibTeX does this automatically for you, and places the contents | |
# of the {thebibliography} environment in a .bbl file. This function inserts | |
# the .bbl file into the manusicript where it finds the \bibliography command. | |
# | |
# Arguments: | |
# @docarray: array of document lines for parsing. | |
# $filename: The root filename of the manuscript, used for identifiying the | |
# .bbl file that accompanies this manuscript. | |
# | |
# Returns: | |
# @docarray: The array will be modified in place. | |
# | |
my ($docarray, $filename) = @_; | |
(my $bibname = $filename) =~ s/(.tex)?$/.bbl/; | |
my $isVerbatimEnvironment = "no"; | |
my @newdoc; | |
foreach my $line ( @{$docarray} ){ | |
$isVerbatimEnvironment = "yes" if ( $line =~ /^\\begin{verbatim}/ ); | |
if ( ($isVerbatimEnvironment eq "no") ){ | |
## Replace the bibliography line. | |
if ($line =~ /\\bibliography{.*}$/) { | |
open(BBL, "<", $bibname) or die "Can't find '$bibname'"; | |
push @newdoc, "\%Bibliography from $bibname\n"; | |
my @bbl = <BBL>; | |
push @newdoc, @bbl; | |
close(BBL); | |
$line = "\%End of $bibname\n"; | |
} | |
} | |
push @newdoc, $line; | |
$isVerbatimEnvironment = "no" if ( $line =~ /^\\end{verbatim}/ ); | |
} | |
@{$docarray} = @newdoc; | |
return 1; | |
} | |
sub collapseInputs { | |
# Insert the contents of a .bbl file into the text of the manuscript in place | |
# of the \bibliography command. Most publications require that the entries | |
# in your bibliography be included in a {thebibliography} environment. | |
# Fortunately, BibTeX does this automatically for you, and places the contents | |
# of the {thebibliography} environment in a .bbl file. This function inserts | |
# the .bbl file into the manusicript where it finds the \bibliography command. | |
# | |
# Arguments: | |
# @docarray: array of document lines for parsing. | |
# | |
# Returns: | |
# @docarray: The array will be modified in place. | |
# | |
my ($docarray) = @_; | |
my $isVerbatimEnvironment = "no"; | |
my @newdoc; | |
foreach my $line ( @{$docarray} ){ | |
$isVerbatimEnvironment = "yes" if ( $line =~ /^\\begin{verbatim}/ ); | |
if ($isVerbatimEnvironment eq "no") { | |
if ($line =~ /\\input{/) { | |
(my $inputfile = $line) =~ s/\\input{(.*?)}$/\1/; | |
$inputfile =~ s/(\.tex)?$/.tex/; | |
chomp $inputfile; | |
if (open(INF, "<", $inputfile)) { | |
my @inputlines = <INF>; | |
push @newdoc, "\%$line\n"; | |
push @newdoc, @inputlines; | |
$line = "\%end of $inputfile\n"; | |
} else { | |
warn "Can't find $inputfile"; | |
} | |
} | |
} | |
push @newdoc, $line; | |
$isVerbatimEnvironment = "no" if ( $line =~ /^\\end{verbatim}/ ); | |
} | |
@{$docarray} = @newdoc; | |
return 1; | |
} | |
sub removeComments { | |
# Remove commented lines from the manuscript text. Comments are lines or | |
# partial lines which begin with '%'. This subroutine tries to avoid | |
# removing escaped '%' signs (i.e. \%). It will also remove {comment} | |
# environments from the manuscript, if present. The contents of {verbatim} | |
# environments are left untouched. | |
# | |
# Arguments: | |
# @docarray: array of document lines for parsing. | |
# | |
# Returns: | |
# @docarray: The array will be modified in place. | |
# | |
my ($docarray) = @_; | |
my $isCommentEnvironment = "no"; | |
my $isVerbatimEnvironment = "no"; | |
my @newdoc; | |
foreach my $line ( @{$docarray} ){ | |
$isVerbatimEnvironment = "yes" if ( $line =~ /^\\begin{verbatim}/ ); | |
$isCommentEnvironment = "yes" if ( $line =~ /^\\begin{comment}/ ); | |
if ( ($isVerbatimEnvironment eq "no") && ($isCommentEnvironment eq "no") ){ | |
# Skip other non-comment lines. | |
next if ($line =~ /^%/); | |
if ( $line =~ /\\%/){ | |
$line =~ s/\\%/TMP::PERCENT/g; | |
$line =~ s/%.*//; | |
$line =~ s/TMP::PERCENT/\\%/g; | |
} else { | |
$line =~ s/\s*%.+//; | |
} | |
push @newdoc, $line; | |
} | |
push @newdoc, $line if ( $isVerbatimEnvironment eq "yes" ); | |
$isVerbatimEnvironment = "no" if ( $line =~ /^\\end{verbatim}/ ); | |
$isCommentEnvironment = "no" if ( $line =~ /^\\end{comment}/ ); | |
} | |
@{$docarray} = @newdoc; | |
return 1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment