Skip to content

Instantly share code, notes, and snippets.

@alexrudy
Last active August 29, 2015 14:19
Show Gist options
  • Save alexrudy/0d04bccd3680c947a42f to your computer and use it in GitHub Desktop.
Save alexrudy/0d04bccd3680c947a42f to your computer and use it in GitHub Desktop.
Prepare LaTeX Manuscripts for Submission
#!/usr/bin/env perl
#
# submit.pl
# A simple tool to prepare .tex manuscirpts for submission.
#
# Copyright (c) 2015, Alexander Rudy
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECTTHIS SOFTWARE
# IS PRAL,THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
# BUT NOT LIMITFITS;THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
# AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING,LUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.
use strict 'vars';
use File::Copy;
use File::Spec::Functions;
use File::Path 'make_path';
# This is a PERL program to handle common tasks for submitting .tex
# manuscripts to sites like arXiv.org
#
# Usage: submit.pl paper.tex [outfolder]
#
# Positional Arguments:
# paper.tex The name of the main .tex file to handle.
# outfolder [Optional] The name of the folder into which all
# compiled output will be placed. Defaults to 'output'
#
#
# This script will work on your file 'paper.tex', placing it and required
# auxiliary files (figures, custom .sty files) into the 'outfolder/' directory.
#
# The MAIN subroutine handles most of the work, and calls 5 functions which do
# the important jobs:
#
# 1. insertBBL replaces \bibliography{} commands with the contents of the
# paper.bbl file, so that references are contained in your primary
# manuscript file as required by many publications.
#
# 2. collapseInputs replaces \input{} commands with the contents of the input
# file (which is assumed to end with .tex).
#
# 3. removeComments strips any '%' comments or \begin{comment}/\end{comment}
# environments from your latex document, but should leave '\%' intact when
# the percent symbol is required.
#
# 4. collectFigures renames all of your figures in sequential, increasing order
# ('figure01', 'figure02', ...) in your .tex source, and moves those renamed
# files to your 'outfolder'. This function does not handle subfigures
# differently from figures.
#
# 5. collectStyles finds any style file referenced by a \usepackage command in
# your .tex source, and copies that file to your 'outfolder' for inclusion
# with your submission.
#
# You can disable any of these features by commenting out the relevant line in
# the MAIN subroutine below.
#
# This program does nothing sophisticated about argument parsing or management
# at this point.
#
# You should always try compiling your output paper to ensure that nothing has
# been messed up.
#
# This script was based on one for stripping .tex comments, from here:
# http://tex.stackexchange.com/questions/83663/utility-to-strip-comments-from-latex-source
#
&MAIN(@ARGV);
sub MAIN {
my ($filehandle, $outputdir) = @_;
# Handle arguments.
unless (-e $filehandle) {
print "Usage: $0 filename.tex outfolder\n";
die "Can't find input file '$filehandle'" ;
}
$outputdir = $outputdir || "output";
make_path($outputdir);
my $outputfile = catfile($outputdir, $filehandle);
# Read the manuscript.
open FILE, "<$filehandle";
my @doc = <FILE>;
close FILE;
# These 5 subroutines provide the core functionality.
# Each of them may be commented out. They independently modify
# @doc inplace.
&insertBBL(\@doc, $filehandle);
&collapseInputs(\@doc);
&removeComments(\@doc);
&collectFigures(\@doc, $outputdir);
&collectStyles(\@doc, $outputdir);
# Save the result.
open(OFILE, ">", $outputfile) or die "Can't open output file $outputfile";
foreach my $line ( @doc ){
print OFILE $line;
}
close(OFILE);
return 1;
}
sub collectFigures {
# Collect figures, moving them to the output directory, and renaming them to be
# figure01 etc in order. Attempts to preserve the correct file extension.
#
# Arguments:
# @docarray: array of document lines, name of the output directory.
# $outputdir: output directory name, for the figures.
#
# Returns:
# @docarray: The array will be modified in place.
#
my ($docarray, $outputdir) = @_;
my $isVerbatimEnvironment = "no";
my @newdoc;
my @figures;
my $figurenum = 1;
foreach my $line ( @{$docarray} ){
$isVerbatimEnvironment = "yes" if ( $line =~ /^\\begin{verbatim}/ );
if ($isVerbatimEnvironment eq "no") {
if ($line =~ /\\includegraphics/) {
(my $figurename = $line ) =~ s/.*?\\includegraphics\[.*?\]{([^\s]+)}\s*$/$1/;
push @figures, $figurename;
my ($ext) = $figurename =~ /\.([^\.]+)$/;
my $newname = sprintf("figure%02d.$ext", $figurenum);
$line =~ s/$figurename/$newname/;
copy($figurename, catfile($outputdir,$newname));
$figurenum++;
}
}
push @newdoc, $line;
$isVerbatimEnvironment = "no" if ( $line =~ /^\\end{verbatim}/ );
}
@{$docarray} = @newdoc;
return 1;
}
sub collectStyles {
# Collect custom style files. Custom style files are identified by the
# \usepackage command, and the presence of a corresponding .sty file in the
# working directory. They are copied to the output directory.
#
# Arguments:
# @docarray: array of document lines for parsing.
# $outputdir: output directory name, for the figures.
#
# Returns:
# @docarray: The array will be modified in place.
#
my ($docarray, $outputdir) = @_;
my $isVerbatimEnvironment = "no";
my @newdoc;
my @styles;
foreach my $line ( @{$docarray} ){
$isVerbatimEnvironment = "yes" if ( $line =~ /^\\begin{verbatim}/ );
if ($isVerbatimEnvironment eq "no") {
if ($line =~ /\s*\\usepackage{.*}\s*$/) {
(my $stylename = $line ) =~ s/\s*\\usepackage{([^\s]+)}\s*$/$1/;
$stylename =~ s/(.sty)?$/.sty/;
if (-e $stylename) {
push @styles, $stylename;
copy($stylename, catfile($outputdir,$stylename));
}
}
}
push @newdoc, $line;
$isVerbatimEnvironment = "no" if ( $line =~ /^\\end{verbatim}/ );
}
@{$docarray} = @newdoc;
return 1;
}
sub insertBBL {
# Insert the contents of a .bbl file into the text of the manuscript in place
# of the \bibliography command. Most publications require that the entries
# in your bibliography be included in a {thebibliography} environment.
# Fortunately, BibTeX does this automatically for you, and places the contents
# of the {thebibliography} environment in a .bbl file. This function inserts
# the .bbl file into the manusicript where it finds the \bibliography command.
#
# Arguments:
# @docarray: array of document lines for parsing.
# $filename: The root filename of the manuscript, used for identifiying the
# .bbl file that accompanies this manuscript.
#
# Returns:
# @docarray: The array will be modified in place.
#
my ($docarray, $filename) = @_;
(my $bibname = $filename) =~ s/(.tex)?$/.bbl/;
my $isVerbatimEnvironment = "no";
my @newdoc;
foreach my $line ( @{$docarray} ){
$isVerbatimEnvironment = "yes" if ( $line =~ /^\\begin{verbatim}/ );
if ( ($isVerbatimEnvironment eq "no") ){
## Replace the bibliography line.
if ($line =~ /\\bibliography{.*}$/) {
open(BBL, "<", $bibname) or die "Can't find '$bibname'";
push @newdoc, "\%Bibliography from $bibname\n";
my @bbl = <BBL>;
push @newdoc, @bbl;
close(BBL);
$line = "\%End of $bibname\n";
}
}
push @newdoc, $line;
$isVerbatimEnvironment = "no" if ( $line =~ /^\\end{verbatim}/ );
}
@{$docarray} = @newdoc;
return 1;
}
sub collapseInputs {
# Insert the contents of a .bbl file into the text of the manuscript in place
# of the \bibliography command. Most publications require that the entries
# in your bibliography be included in a {thebibliography} environment.
# Fortunately, BibTeX does this automatically for you, and places the contents
# of the {thebibliography} environment in a .bbl file. This function inserts
# the .bbl file into the manusicript where it finds the \bibliography command.
#
# Arguments:
# @docarray: array of document lines for parsing.
#
# Returns:
# @docarray: The array will be modified in place.
#
my ($docarray) = @_;
my $isVerbatimEnvironment = "no";
my @newdoc;
foreach my $line ( @{$docarray} ){
$isVerbatimEnvironment = "yes" if ( $line =~ /^\\begin{verbatim}/ );
if ($isVerbatimEnvironment eq "no") {
if ($line =~ /\\input{/) {
(my $inputfile = $line) =~ s/\\input{(.*?)}$/\1/;
$inputfile =~ s/(\.tex)?$/.tex/;
chomp $inputfile;
if (open(INF, "<", $inputfile)) {
my @inputlines = <INF>;
push @newdoc, "\%$line\n";
push @newdoc, @inputlines;
$line = "\%end of $inputfile\n";
} else {
warn "Can't find $inputfile";
}
}
}
push @newdoc, $line;
$isVerbatimEnvironment = "no" if ( $line =~ /^\\end{verbatim}/ );
}
@{$docarray} = @newdoc;
return 1;
}
sub removeComments {
# Remove commented lines from the manuscript text. Comments are lines or
# partial lines which begin with '%'. This subroutine tries to avoid
# removing escaped '%' signs (i.e. \%). It will also remove {comment}
# environments from the manuscript, if present. The contents of {verbatim}
# environments are left untouched.
#
# Arguments:
# @docarray: array of document lines for parsing.
#
# Returns:
# @docarray: The array will be modified in place.
#
my ($docarray) = @_;
my $isCommentEnvironment = "no";
my $isVerbatimEnvironment = "no";
my @newdoc;
foreach my $line ( @{$docarray} ){
$isVerbatimEnvironment = "yes" if ( $line =~ /^\\begin{verbatim}/ );
$isCommentEnvironment = "yes" if ( $line =~ /^\\begin{comment}/ );
if ( ($isVerbatimEnvironment eq "no") && ($isCommentEnvironment eq "no") ){
# Skip other non-comment lines.
next if ($line =~ /^%/);
if ( $line =~ /\\%/){
$line =~ s/\\%/TMP::PERCENT/g;
$line =~ s/%.*//;
$line =~ s/TMP::PERCENT/\\%/g;
} else {
$line =~ s/\s*%.+//;
}
push @newdoc, $line;
}
push @newdoc, $line if ( $isVerbatimEnvironment eq "yes" );
$isVerbatimEnvironment = "no" if ( $line =~ /^\\end{verbatim}/ );
$isCommentEnvironment = "no" if ( $line =~ /^\\end{comment}/ );
}
@{$docarray} = @newdoc;
return 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment