Skip to content

Instantly share code, notes, and snippets.

@abhididdigi
Created August 4, 2013 16:53
Show Gist options
  • Save abhididdigi/6150984 to your computer and use it in GitHub Desktop.
Save abhididdigi/6150984 to your computer and use it in GitHub Desktop.
import re
class ProcessMovieName:
# 1 > we substring them, for the year. So incase if the string contains
# any of the year mentioned, then it truncates it.
listOfYears = range(1900,2020) # So i don't have to bother till 2020, instead
# we could just set the current year too.
processedName ='';
def cleanName(self,name):
pattern = re.compile('[()\[\]]')
name = pattern.sub('',name);
pattern = re.compile("[']");
name = pattern.sub('',name)
name = re.sub('UNRATED','',name)
name = re.sub('unrated','',name)
pattern = re.compile("[._]")
return pattern.sub(' ',name)
def processMovieName(self,name):
assert type(name) == str
[self.checkYear(name,years) for years in self.listOfYears]
return self.processedName;
def checkYear(self,name,year):
if name.find(str(year)) != -1:
name = name[0:name.find(str(year))]
self.processedName = self.cleanName(name);
def process(self,name):
return self.processMovieName(name);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment