Skip to content

Instantly share code, notes, and snippets.

@magixx
Last active August 29, 2015 14:12
Show Gist options
  • Save magixx/6e6d0c71866cdb2547d4 to your computer and use it in GitHub Desktop.
Save magixx/6e6d0c71866cdb2547d4 to your computer and use it in GitHub Desktop.
String number matching magic for scene releases
import regex
test_release = 'Release.Name.v1.3.0.DISC.1.Other.Tags.x.264.<RESOLUTION>.WMV'
memory = []
main = []
split_release = test_release.split('.')
split_release.reverse()
split_release_copy = split_release[:]
# Tags that I want to know about
number_reg = regex.compile('(?:(?P<disc>DISC)|(?P<update>UPDATE)|(?P<build>BUILD)|(?P<version>v\d+)|'
'(?P<codec>\Ax|h))\Z', 258)
# Special functions for those found tags
extra_modifiers = {'codec': lambda memory_list: ''.join(memory),
'disc': lambda memory_list: disc_join_plus(memory)}
def disc_join_plus(memory_list):
print 'DISC: {}'.format(memory[1:])
return '<DISC>'
def is_numberlike(string):
try:
float(string)
return True
except ValueError:
if string[0].isdigit() and string[-1].isalpha() and (len(filter(str.isdigit, string))/len(string) >= 0.5):
return True
return False
for k, v in enumerate(split_release):
try:
# prints out what item your on nicely
nice = ''
marker = ''
for i in split_release:
nice += i + '\t'
for i in range(0, k):
marker += '\t'
print nice
print marker + '^'
# item always put into memory
memory.append(split_release_copy.pop(0))
print('Main string is {} | Current memory: {}'.format(main, memory))
do_special_work = number_reg.search(v)
# item always put in tracked
# if it's a special known tag type change it + the memory items to whatever
if do_special_work and len(memory) > 1:
# reverse the memory to original state
memory.reverse()
# see what regex group matched
tag_info = list(next((group_name, group_value) for group_name, group_value in
do_special_work.groupdict().iteritems() if group_value))
# do a regular do join on the numbers or something special if specified
main.append( extra_modifiers[tag_info[0]](memory) if tag_info[0] in extra_modifiers else '.'.join(memory))
# clear the memory
memory = []
elif is_numberlike(v) and k < len(split_release):
# This item is a number (and not first item in original string) so it must belong to the upcoming item
continue
else:
memory.reverse()
# I might want to skip the join and directly add all items to memory 'as is' or something different,
# i.e. this item (main.append(memory.pop())) >> main, then join rest of memory to main, however it seems
# that numbers and number-like items should always belong to an item
main.append('.'.join(memory))
# clear the memory
memory = []
except IndexError as e:
print e
split_release.reverse()
print '{} became:'.format(split_release)
main.reverse()
print main
---------------
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release
^
Main string is [] | Current memory: ['WMV']
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release
^
Main string is ['WMV'] | Current memory: ['<RESOLUTION>']
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release
^
Main string is ['WMV', '<RESOLUTION>'] | Current memory: ['264']
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release
^
Main string is ['WMV', '<RESOLUTION>'] | Current memory: ['264', 'x']
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release
^
Main string is ['WMV', '<RESOLUTION>', 'x264'] | Current memory: ['Tags']
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release
^
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags'] | Current memory: ['Other']
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release
^
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags', 'Other'] | Current memory: ['1']
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release
^
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags', 'Other'] | Current memory: ['1', 'DISC']
DISC: ['1']
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release
^
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags', 'Other', '<DISC>'] | Current memory: ['0']
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release
^
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags', 'Other', '<DISC>'] | Current memory: ['0', '3']
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release
^
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags', 'Other', '<DISC>'] | Current memory: ['0', '3', 'v1']
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release
^
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags', 'Other', '<DISC>', 'v1.3.0'] | Current memory: ['Name']
WMV <RESOLUTION> 264 x Tags Other 1 DISC 0 3 v1 Name Release
^
Main string is ['WMV', '<RESOLUTION>', 'x264', 'Tags', 'Other', '<DISC>', 'v1.3.0', 'Name'] | Current memory: ['Release']
['Release', 'Name', 'v1', '3', '0', 'DISC', '1', 'Other', 'Tags', 'x', '264', '<RESOLUTION>', 'WMV'] became:
['Release', 'Name', 'v1.3.0', '<DISC>', 'Other', 'Tags', 'x264', '<RESOLUTION>', 'WMV']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment