Last active
August 29, 2015 14:12
-
-
Save magixx/6e6d0c71866cdb2547d4 to your computer and use it in GitHub Desktop.
String number matching magic for scene releases
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import regex | |
test_release = 'Release.Name.v1.3.0.DISC.1.Other.Tags.x.264.<RESOLUTION>.WMV' | |
memory = [] | |
main = [] | |
split_release = test_release.split('.') | |
split_release.reverse() | |
split_release_copy = split_release[:] | |
# Tags that I want to know about | |
number_reg = regex.compile('(?:(?P<disc>DISC)|(?P<update>UPDATE)|(?P<build>BUILD)|(?P<version>v\d+)|' | |
'(?P<codec>\Ax|h))\Z', 258) | |
# Special functions for those found tags | |
extra_modifiers = {'codec': lambda memory_list: ''.join(memory), | |
'disc': lambda memory_list: disc_join_plus(memory)} | |
def disc_join_plus(memory_list): | |
print 'DISC: {}'.format(memory[1:]) | |
return '<DISC>' | |
def is_numberlike(string): | |
try: | |
float(string) | |
return True | |
except ValueError: | |
if string[0].isdigit() and string[-1].isalpha() and (len(filter(str.isdigit, string))/len(string) >= 0.5): | |
return True | |
return False | |
for k, v in enumerate(split_release): | |
try: | |
# prints out what item your on nicely | |
nice = '' | |
marker = '' | |
for i in split_release: | |
nice += i + '\t' | |
for i in range(0, k): | |
marker += '\t' | |
print nice | |
print marker + '^' | |
# item always put into memory | |
memory.append(split_release_copy.pop(0)) | |
print('Main string is {} | Current memory: {}'.format(main, memory)) | |
do_special_work = number_reg.search(v) | |
# item always put in tracked | |
# if it's a special known tag type change it + the memory items to whatever | |
if do_special_work and len(memory) > 1: | |
# reverse the memory to original state | |
memory.reverse() | |
# see what regex group matched | |
tag_info = list(next((group_name, group_value) for group_name, group_value in | |
do_special_work.groupdict().iteritems() if group_value)) | |
# do a regular do join on the numbers or something special if specified | |
main.append( extra_modifiers[tag_info[0]](memory) if tag_info[0] in extra_modifiers else '.'.join(memory)) | |
# clear the memory | |
memory = [] | |
elif is_numberlike(v) and k < len(split_release): | |
# This item is a number (and not first item in original string) so it must belong to the upcoming item | |
continue | |
else: | |
memory.reverse() | |
# I might want to skip the join and directly add all items to memory 'as is' or something different, | |
# i.e. this item (main.append(memory.pop())) >> main, then join rest of memory to main, however it seems | |
# that numbers and number-like items should always belong to an item | |
main.append('.'.join(memory)) | |
# clear the memory | |
memory = [] | |
except IndexError as e: | |
print e | |
split_release.reverse() | |
print '{} became:'.format(split_release) | |
main.reverse() | |
print main | |
--------------- | |
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release | |
^ | |
Main string is [] | Current memory: ['WMV'] | |
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release | |
^ | |
Main string is ['WMV'] | Current memory: ['<RESOLUTION>'] | |
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release | |
^ | |
Main string is ['WMV', '<RESOLUTION>'] | Current memory: ['264'] | |
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release | |
^ | |
Main string is ['WMV', '<RESOLUTION>'] | Current memory: ['264', 'x'] | |
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release | |
^ | |
Main string is ['WMV', '<RESOLUTION>', 'x264'] | Current memory: ['Tags'] | |
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release | |
^ | |
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags'] | Current memory: ['Other'] | |
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release | |
^ | |
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags', 'Other'] | Current memory: ['1'] | |
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release | |
^ | |
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags', 'Other'] | Current memory: ['1', 'DISC'] | |
DISC: ['1'] | |
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release | |
^ | |
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags', 'Other', '<DISC>'] | Current memory: ['0'] | |
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release | |
^ | |
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags', 'Other', '<DISC>'] | Current memory: ['0', '3'] | |
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release | |
^ | |
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags', 'Other', '<DISC>'] | Current memory: ['0', '3', 'v1'] | |
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release | |
^ | |
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags', 'Other', '<DISC>', 'v1.3.0'] | Current memory: ['Name'] | |
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release | |
^ | |
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags', 'Other', '<DISC>', 'v1.3.0', 'Name'] | Current memory: ['Release'] | |
['Release', 'Name', 'v1', '3', '0', 'DISC', '1', 'Other', 'Tags', 'x', '264', '<RESOLUTION>', 'WMV'] became: | |
['Release', 'Name', 'v1.3.0', '<DISC>', 'Other', 'Tags', 'x264', '<RESOLUTION>', 'WMV'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment