Skip to content

Instantly share code, notes, and snippets.

@Chitrank-Dixit
Created September 8, 2015 06:28
Show Gist options
  • Save Chitrank-Dixit/6e1e429f98583eddb2db to your computer and use it in GitHub Desktop.
Save Chitrank-Dixit/6e1e429f98583eddb2db to your computer and use it in GitHub Desktop.
clean latex file remove all formatting and just keep the mathematical equations formatting active.
def get_all_data(datasource):
#print "In get_all_data"
#datasource = datasource.replace('\n','')
start = datasource.find('\\textb')
#import pdb; pdb.set_trace()
#print start
if start == -1:
return None,0
st_data = datasource.find('f', start)
end_data = datasource.find('\\textbf',st_data+1)
data = datasource[st_data+1: end_data]
return data, end_data
def process_data(seed):
#print "In process_data"
while seed:
data, end_data = get_all_data(seed)
lists.append(data)
seed = seed[end_data:]
if not data:
break
return lists
#################### cleaning the \\ from the substrings ############################
def cleanup(lists):
length = len(lists)
for string in lists:
if type(string) == str:
string = string.replace("`\\textit{a}'","'a'")
string = string.replace('\\begin{center}','')
string = string.replace('{\uppercase','')
string = string.replace('\\Large ','')
string = string.replace('\\end{center}','')
string = string.replace('\\hspace{15pt}','')
string = string.replace('}}}','')
string = string.replace('}}','')
string = string.replace('{{','')
string = string.replace('` extit{','')
string = string.replace("}'", '')
if r'{Q.' in string:
index = string.find('{Q.')
#print index, string[index]
head = string[index+1:4]
body = string[index+5:]
string = head + ' ' +body
new_list.append(string)
if type(string) == None:
pass
return new_list
# tex_file = open('rotatory_motion.txt','r')
# strnew = tex_file.read()
text_file = open('rotatory_motion.txt','r')
# text_file.write(strnew)
# text_file.close()
new_file = open('new_text_gen.txt', 'wb+')
strnew = text_file.read()
#print strnew
lists = []
lists = process_data(strnew)
#print lists
new_list = []
new_list=cleanup(lists)
for item in new_list:
new_file.write(item)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment