Created
September 8, 2015 06:28
-
-
Save Chitrank-Dixit/6e1e429f98583eddb2db to your computer and use it in GitHub Desktop.
clean latex file remove all formatting and just keep the mathematical equations formatting active.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_all_data(datasource): | |
| #print "In get_all_data" | |
| #datasource = datasource.replace('\n','') | |
| start = datasource.find('\\textb') | |
| #import pdb; pdb.set_trace() | |
| #print start | |
| if start == -1: | |
| return None,0 | |
| st_data = datasource.find('f', start) | |
| end_data = datasource.find('\\textbf',st_data+1) | |
| data = datasource[st_data+1: end_data] | |
| return data, end_data | |
| def process_data(seed): | |
| #print "In process_data" | |
| while seed: | |
| data, end_data = get_all_data(seed) | |
| lists.append(data) | |
| seed = seed[end_data:] | |
| if not data: | |
| break | |
| return lists | |
| #################### cleaning the \\ from the substrings ############################ | |
| def cleanup(lists): | |
| length = len(lists) | |
| for string in lists: | |
| if type(string) == str: | |
| string = string.replace("`\\textit{a}'","'a'") | |
| string = string.replace('\\begin{center}','') | |
| string = string.replace('{\uppercase','') | |
| string = string.replace('\\Large ','') | |
| string = string.replace('\\end{center}','') | |
| string = string.replace('\\hspace{15pt}','') | |
| string = string.replace('}}}','') | |
| string = string.replace('}}','') | |
| string = string.replace('{{','') | |
| string = string.replace('` extit{','') | |
| string = string.replace("}'", '') | |
| if r'{Q.' in string: | |
| index = string.find('{Q.') | |
| #print index, string[index] | |
| head = string[index+1:4] | |
| body = string[index+5:] | |
| string = head + ' ' +body | |
| new_list.append(string) | |
| if type(string) == None: | |
| pass | |
| return new_list | |
| # tex_file = open('rotatory_motion.txt','r') | |
| # strnew = tex_file.read() | |
| text_file = open('rotatory_motion.txt','r') | |
| # text_file.write(strnew) | |
| # text_file.close() | |
| new_file = open('new_text_gen.txt', 'wb+') | |
| strnew = text_file.read() | |
| #print strnew | |
| lists = [] | |
| lists = process_data(strnew) | |
| #print lists | |
| new_list = [] | |
| new_list=cleanup(lists) | |
| for item in new_list: | |
| new_file.write(item) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment