Created
August 29, 2011 02:05
-
-
Save mohayonao/1177605 to your computer and use it in GitHub Desktop.
PDFを分割する
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import os, re, optparse | |
import yaml | |
import pyPdf | |
def getprofile(profile): | |
profile = yaml.load(open(profile)) | |
prefix = profile.get('prefix', '') | |
suffix = profile.get('suffix', '') | |
fnum = profile.get('fnum', 0) | |
lastPage = profile.get('lastPage') | |
indexes = profile.get('indexes', []) | |
fname = ' '.join([prefix, '%(name)s', suffix]).strip() | |
return indexes, lastPage, (fname, fnum) | |
def makeindexes(indexes): | |
x = re.compile(r'\(\s*([0-9]+)\s*(?:(,)\s*([0-9]+)?\s*)?\)\s*(.*)$') | |
tmp = [] | |
for i in xrange(len(indexes)): | |
m = x.match(indexes[i]) | |
if not m: | |
print 'Syntax Error: line=%d\n - %s' % (i+1, indexes[i]) | |
exit(1) | |
start = stop = int(m.group(1)) | |
if m.group(2): | |
stop = int(m.group(3)) if m.group(3) else None | |
name = m.group(4).strip() | |
tmp.append((start, stop, name)) | |
return tmp | |
def main(): | |
parser = optparse.OptionParser(usage='%prog [Options] src') | |
parser.add_option("-o", "--offset", type="int", default=0) | |
parser.add_option("-p", "--profile") | |
(opts, args) = parser.parse_args() | |
if not args: | |
parser.print_help() | |
exit(0) | |
src = args[0] | |
profile = opts.profile or src | |
offset = opts.offset | |
if not src.lower().endswith('pdf'): | |
src += '.pdf' | |
if not profile.lower().endswith('yaml'): | |
profile += '.yaml' | |
indexes, lastPage, (fname, fnum) = getprofile(profile) | |
indexes = makeindexes(indexes) | |
print 'Divide PDF: %s (using %s)' % (src, profile) | |
src = pyPdf.PdfFileReader(file(src, 'rb')) | |
if lastPage is None: | |
lastPage = src.numPages | |
else: | |
lastPage = min(lastPage, src.numPages) | |
for i, (start, stop, name) in enumerate(indexes): | |
dst = pyPdf.PdfFileWriter() | |
if stop is None: | |
if i < len(indexes) -1: | |
stop = indexes[i+1][0] - 1 | |
else: | |
stop = lastPage | |
filename = fname % dict(fnum=(fnum+i), name=name) | |
print ' page(%4d-%4d) => %s.pdf' % (start, stop, filename) | |
for j in xrange(start, stop+1): | |
if j + offset < lastPage: | |
dst.addPage(src.getPage(j + offset - 1)) | |
out = file('%s.pdf' % filename, 'wb') | |
dst.write(out) | |
out.close() | |
if __name__ == '__main__': | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
prefix: 'sub' | |
suffix: '%(fnum)02d' | |
fnum: 1 | |
indexes: | |
- ( 1 ) foo # p. 1 only => "sub foo 01.pdf" | |
- ( 5, 8) bar # from p. 5 to p. 8 => "sub bar 02.pdf" | |
- ( 9, ) baz # from p. 9 to p.11 => "sub baz 03.pdf" | |
- ( 12, ) hoge # from p.12 to last => "sub hoge 04.pdf" | |
lastPage: 9999 # options (default=last page of a source PDF) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Usage: dpdf.py [Options] src | |
Options: | |
-h, --help show this help message and exit | |
-o OFFSET, --offset=OFFSET | |
-p PROFILE, --profile=PROFILE |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment