-
-
Save alexalouit/a66866fbda3557c472f91a3a95168499 to your computer and use it in GitHub Desktop.
Removes it-ebooks.info links from book's footers. Run python itebooks.py -f /sourcepath/books/thebook.pdf to cleanup your thebook.pdf file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import re | |
import shutil | |
import argparse | |
import binascii | |
pattern = b'''0a2f54797065202f416e6e6f740a2f53756274797065202f4c696e6b0a2f52656 | |
374205b20.*?205d0a2f426f7264657220.*?\n0a2f41203c3c0a2f54797065202f416374696f6e | |
0a2f53202f5552490a2f5552492028687474703a2f2f7777772e69742d65626f6f6b732e696e666 | |
f2f290a3e3e'''.replace(b'\n', b'').strip() | |
def remove_evil_links(pdf_data): | |
'Removes all it-ebook links and metadata from the passed PDF data.' | |
pdf_data = binascii.hexlify(pdf_data) | |
# Remove each annotation element inside the PDF file | |
# (This removes the "clickable" it-ebooks.info links) | |
new_data = re.sub(pattern, b'', pdf_data) | |
# Remove the actual links | |
# (link elements which are assigned to the annotations) | |
new_data = new_data.replace(binascii.hexlify(b'www.it-ebooks.info'), b'') | |
return binascii.unhexlify(new_data) | |
def main(args): | |
try: | |
args.files = list(set(args.files)) | |
for file_path in args.files: | |
if not file_path: | |
continue | |
if args.verbose: | |
print('Processing: {0}'.format(file_path)) | |
try: | |
with open(file_path, 'rb') as input_file: | |
pdf_data = input_file.read() | |
except IOError as e: | |
sys.stderr.write('{0}: {1}\n'.format(file_path, e.strerror)) | |
sys.stderr.flush() | |
continue | |
# Backup the file with a different name | |
if not args.no_backup: | |
if args.verbose: | |
print('Creating backup: {0}.old'.format(file_path)) | |
shutil.move(file_path, '{0}.old'.format(file_path)) | |
# Modify the PDF file | |
new_pdf_data = remove_evil_links(pdf_data) | |
# Save the new file | |
with open(file_path, 'wb') as out_file: | |
out_file.write(new_pdf_data) | |
if args.verbose: | |
print('Saving modified file: {0}'.format(file_path)) | |
except KeyboardInterrupt: | |
pass | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
'-f', '--files', | |
help='One or more PDF files to remove it-ebook watermarks.', | |
nargs='*', required=True | |
) | |
parser.add_argument( | |
'-n', '--no-backup', | |
help='Disables the creating of backups for the files ' + | |
'which are being processed.', | |
action='store_true' | |
) | |
parser.add_argument( | |
'-v', '--verbose', | |
action='store_true' | |
) | |
args = parser.parse_args() | |
main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment