Created
March 22, 2016 17:54
-
-
Save mgeeky/15d0e53c133c6e1ec08f to your computer and use it in GitHub Desktop.
Phrack e-zines renaming utility. When having phrack philes downloaded, it will crawl through the phrack.org, collect articles captions and apply them to files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# This script traverses on PHRACK e-zines directory | |
# and for each downloads corresponding contents. This | |
# is used to name every file (phile) in issue directory | |
# with a name as title from webpage contents. | |
# | |
# MGeeky, 2012 | |
import os | |
import urllib | |
import string | |
from sys import exit | |
import re | |
import HTMLParser | |
# G L O B A L S | |
# directory with phrack ezines | |
g_PhrackDir = r"d:\ebooks\SECURITY\MAGAZINES\PHRACK" | |
# File name format, must have following tokens: | |
# %(issue)d, %(phile)d, %(name)s. | |
g_NameFmt = "p%(issue)02d-0x%(phile)02x - %(author)s - %(name)s.txt" | |
################################### | |
def RenFiles(dir): | |
issue = int(re.match(".*phrack(\d{1,2})", dir).groups(None)[0]) | |
assert issue > 0 | |
if issue == None: | |
print "\t[!] Error while parsing dir name:", dir | |
return | |
url = "http://phrack.org/issues/%d/1.html" % issue | |
h = HTMLParser.HTMLParser() | |
for root, dirs, files in os.walk(dir): | |
philes = len(files) | |
print "\n[>] Renaming #%02d issue philes. There are %d of them." \ | |
% (issue, philes) | |
# opening url resource with list off philes | |
u = urllib.urlopen(url) | |
if not u: | |
print "\t[!] Cannot download '%s' resource" % url | |
return | |
page = u.read() | |
(names, authors) = parsePage(page) | |
if not page: | |
print "\t[!] Cannot download %d issue!" % issue | |
exit(1) | |
for f in files: | |
if ".tar.gz" in f: | |
continue | |
m = re.match(r"(\d{1,2})\.txt", f, re.I) | |
if not m: | |
print "\t[!] Cannot find phile: %d of #%d issue" \ | |
% (phile, issue) | |
continue | |
phile = int(m.group(1)) - 1 | |
# exact renaming... | |
# | |
name = names[phile] | |
author = authors[phile] | |
new = g_NameFmt % {"issue":issue, "phile":phile, "author":author, "name":name} | |
if f == new or not name: | |
continue | |
# some corrections... | |
new = h.unescape(new) | |
mapping = {'<':'-', '>':'-', ':':'-', '"':'-', '/':'-', '\\':'-', '|':'-', '?':'-', '*':'-'} | |
for k, v in mapping.iteritems(): | |
new = new.replace(k, v) | |
newf = os.path.join(root, new) | |
oldf = os.path.join(root, f) | |
try: | |
os.rename(oldf, newf) | |
print "\t", f, "->", new | |
except: | |
print "\t[!] Couldn't rename file: '%s' !" % newf | |
################################### | |
def parsePage(page): | |
names = [] | |
authors = [] | |
rex = r'<tr><td align="left"><a href=".+#article">(.+)<.a><.td><td align="right">(.+)<.td><.tr>' | |
for m in re.finditer(rex, page, re.I | re.M): | |
names.append(m.group(1)) | |
authors.append(m.group(2)) | |
assert len(names) == len(authors) and len(names) > 0 | |
return (names, authors) | |
################################### | |
if __name__ == '__main__': | |
print "\n[+] PHRACK magazine files namer" | |
for root, dirs, files in os.walk(g_PhrackDir): | |
for d in dirs: | |
RenFiles(os.path.join(g_PhrackDir, d)) | |
print "\nEnd of script." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment