Created
August 26, 2019 17:51
-
-
Save anatoly-scherbakov/9b532ebf790d91d142f459b54bb310d8 to your computer and use it in GitHub Desktop.
Parse UNRAR output with pyparsing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python3 | |
import json | |
from pyparsing import ( | |
ParserElement, Literal, Group, restOfLine, | |
ZeroOrMore, LineEnd, CharsNotIn | |
) | |
TEXT = """UNRAR 5.71 freeware Copyright (c) 1993-2019 Alexander Roshal | |
Archive: test_advarchs_direct_link.rar | |
Details: RAR 5 | |
Name: PyCharmHotKeys.pdf | |
Type: File | |
Size: 350573 | |
Packed size: 301085 | |
Ratio: 85% | |
mtime: 2017-05-30 06:26:53,000000000 | |
Attributes: ..A.... | |
CRC32: 538D9621 | |
Host OS: Windows | |
Compression: RAR 5.0(v50) -m3 -md=16M | |
Name: sublime_text_shortcuts.pdf | |
Type: File | |
Size: 67750 | |
Packed size: 63559 | |
Ratio: 93% | |
mtime: 2017-12-06 01:54:03,000000000 | |
Attributes: ..A.... | |
CRC32: F08BA6D1 | |
Host OS: Windows | |
Compression: RAR 5.0(v50) -m3 -md=16M | |
Name: TextInMotion-VideoSample.mp4 | |
Type: File | |
Size: 10454044 | |
Packed size: 9955216 | |
Ratio: 95% | |
mtime: 2019-08-06 13:34:27,207165800 | |
Attributes: ..A.... | |
CRC32: C799547F | |
Host OS: Windows | |
Compression: RAR 5.0(v50) -m3 -md=16M | |
Name: bensound-summer.mp3 | |
Type: File | |
Size: 4349086 | |
Packed size: 4329351 | |
Ratio: 99% | |
mtime: 2019-08-06 13:32:47,950623500 | |
Attributes: ..A.... | |
CRC32: A6203CE2 | |
Host OS: Windows | |
Compression: RAR 5.0(v50) -m3 -md=16M | |
Name: Financial Sample.xlsx | |
Type: File | |
Size: 83418 | |
Packed size: 77547 | |
Ratio: 92% | |
mtime: 2019-08-06 13:23:29,329002900 | |
Attributes: ..A.... | |
CRC32: 84D57D24 | |
Host OS: Windows | |
Compression: RAR 5.0(v50) -m3 -md=16M | |
Name: QO | |
Type: Service | |
Size: 351 | |
Packed size: 351 | |
Ratio: 100% | |
Attributes: .B | |
Host OS: Windows | |
Compression: RAR 5.0(v50) -m0 -md=128K | |
Service: EOF""" | |
def parse(value: str): | |
ParserElement.defaultWhitespaceChars = " \t" | |
SEMICOLON = Literal(': ').suppress() | |
EOL = LineEnd().suppress() | |
header = Literal('UNRAR') + restOfLine | |
row = Group( | |
CharsNotIn('\n:') # Key | |
+ SEMICOLON | |
+ restOfLine # Value | |
+ EOL | |
) | |
# Sections are separated from each other by empty lines | |
section = Group( | |
EOL[1, 2] | |
+ ZeroOrMore(row) | |
) | |
expression = ( | |
header.suppress() # Software info | |
+ section.suppress() # Archive info | |
+ ZeroOrMore(section) # Files info | |
) | |
sections = expression.parseString(value).asList() | |
return sections | |
def main(): | |
# TODO we have to do `.strip()` here because we grab everything | |
return [ | |
dict((key.strip(), value) for key, value in section) | |
for section in parse(value=TEXT) | |
] | |
if __name__ == '__main__': | |
print(json.dumps(main(), indent=2)) | |
''' | |
[ | |
{ | |
"Name": "PyCharmHotKeys.pdf", | |
"Type": "File", | |
"Size": "350573", | |
"Packed size": "301085", | |
"Ratio": "85%", | |
"mtime": "2017-05-30 06:26:53,000000000", | |
"Attributes": "..A....", | |
"CRC32": "538D9621", | |
"Host OS": "Windows", | |
"Compression": "RAR 5.0(v50) -m3 -md=16M" | |
}, | |
{ | |
"Name": "sublime_text_shortcuts.pdf", | |
"Type": "File", | |
"Size": "67750", | |
"Packed size": "63559", | |
"Ratio": "93%", | |
"mtime": "2017-12-06 01:54:03,000000000", | |
"Attributes": "..A....", | |
"CRC32": "F08BA6D1", | |
"Host OS": "Windows", | |
"Compression": "RAR 5.0(v50) -m3 -md=16M" | |
}, | |
{ | |
"Name": "TextInMotion-VideoSample.mp4", | |
"Type": "File", | |
"Size": "10454044", | |
"Packed size": "9955216", | |
"Ratio": "95%", | |
"mtime": "2019-08-06 13:34:27,207165800", | |
"Attributes": "..A....", | |
"CRC32": "C799547F", | |
"Host OS": "Windows", | |
"Compression": "RAR 5.0(v50) -m3 -md=16M" | |
}, | |
{ | |
"Name": "bensound-summer.mp3", | |
"Type": "File", | |
"Size": "4349086", | |
"Packed size": "4329351", | |
"Ratio": "99%", | |
"mtime": "2019-08-06 13:32:47,950623500", | |
"Attributes": "..A....", | |
"CRC32": "A6203CE2", | |
"Host OS": "Windows", | |
"Compression": "RAR 5.0(v50) -m3 -md=16M" | |
}, | |
{ | |
"Name": "Financial Sample.xlsx", | |
"Type": "File", | |
"Size": "83418", | |
"Packed size": "77547", | |
"Ratio": "92%", | |
"mtime": "2019-08-06 13:23:29,329002900", | |
"Attributes": "..A....", | |
"CRC32": "84D57D24", | |
"Host OS": "Windows", | |
"Compression": "RAR 5.0(v50) -m3 -md=16M" | |
}, | |
{ | |
"Name": "QO", | |
"Type": "Service", | |
"Size": "351", | |
"Packed size": "351", | |
"Ratio": "100%", | |
"Attributes": ".B", | |
"Host OS": "Windows", | |
"Compression": "RAR 5.0(v50) -m0 -md=128K" | |
}, | |
{ | |
"Service": "EOF" | |
} | |
] | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment