DrLulz · November 15, 2015 13:03
diff --git a/raymond-4.0.py b/raymond-4.0.py
 #!/usr/local/bin/python
 # -*- coding: utf-8 -*-

 import re
 import os
 import csv
 import sys
 import glob
 import string
 import linecache
 from Tkinter import Tk
 from cStringIO import StringIO
 from tkFileDialog import askdirectory
 from pyth.plugins.rtf15.reader import Rtf15Reader

 #CSV_FILE = '/Users/NAME/Desktop/file.csv'

 # [ERROR HANDLING]
 #import logging as log
 #log.basicConfig(filename=os.path.expanduser('~/Desktop/log'), level=log.DEBUG)

 class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'


 def PrintException():
    exc_type, exc_obj, tb = sys.exc_info()
    f = tb.tb_frame
    lineno = tb.tb_lineno
    filename = f.f_code.co_filename
    linecache.checkcache(filename)
    line = linecache.getline(filename, lineno, f.f_globals)
    print bcolors.HEADER + 'EXCEPTION IN: {}'.format(filename) + bcolors.ENDC
    print bcolors.HEADER + 'LINE: {}'.format(lineno) + bcolors.ENDC
    print bcolors.HEADER + 'CODE: {}'.format(line.strip()) + bcolors.ENDC
    print bcolors.WARNING + 'ERROR: {}'.format(exc_obj) + bcolors.ENDC
    sys.exit()


 def decode_cell(cell):
    '''The cell matched so lets handle it'''
    
    # variable that will hold the converted text
    temp_cell = []
    
    # pyth checks for the rtf syntax before processing, so 'unicode_escape' escapes the '\' so pyth doesn't complain
    cell_encode = cell.decode('unicode_escape')
    cell_encode = filter(lambda x: x in string.printable, cell_encode)
    cell_rtf = Rtf15Reader.read(StringIO(cell_encode))

    # turn the pyth object into readable text
    cell_txt = [x.content for x in cell_rtf.content]
    
    # iterate and extract the pyth object text into temp_cell
    for line in cell_txt:
        for l in line:
            temp_cell.append(l.content)
                
    
    # combine and join the extracted text into one string (for one cell)
    combined = [i for sub in temp_cell for i in sub]
    new_cell =  ' '.join(combined)
    
    # the non-ascii characters in your file were followed by _ so i removed them for cleanliness
    # uncomment to keep the _
    new_cell = re.sub('_', '', new_cell)
    
    # remove extra whitespace and return the converted cell
    # remove L at end of string
    decoded_cell = ' '.join(new_cell[:-1].split())

 #    log.debug(decoded_cell)
 #    log.info(decoded_cell)
 #    log.warning(decoded_cell)
    return decoded_cell



 def find_rtf(row):
    '''Start looking for rtf syntax'''
    
    # variable that will return the row to writer
    temp_row = []
    
    # loop and index each cell in row
    for n, cell in enumerate(row):
        
        # your csv is shitty
        if type(cell) == str:
            cell = unicode(cell, "utf-8", errors="ignore")
        else:
            cell = unicode(cell)

        # if the cell text starts with {\\rtf we need to know
        if re.match(r'^{\\\\rtf', cell):
            
            # holder
            combined = []
            
            # collect all cells following matched cell
            for item in row[n:]:
                combined.append(item)
            
            # combine the rest of the row
            cell = ' '.join(combined)
            
            # send off to convert rtf
            cell_matched = decode_cell(cell)
            
            # add the cell, with converted rtf, back to the row
            temp_row.append(cell_matched.encode('ascii', 'ignore'))
            
            # we don’t want to process further cells because they're now combined
            # break the loop to start at next row
            break

        else:
            # if the cell didn't have rtf just add it back to the row
            temp_row.append(cell.encode('ascii', 'ignore'))

 #    log.info(temp_row)            
    return temp_row



 def add_suffix(f_ori):
    '''Append suffix to original file name'''
    
    suffix = '-processed'
    
    # explode full path into path, name, ext
    path, name = os.path.split(f_ori)
    name, ext = os.path.splitext(name)
    
    # function to append suffix
    mk_suffix = lambda i: os.path.join(path, '%s%s%s' % (name, i, ext))
    
    # process and return
    return mk_suffix(suffix)
    
    

 def open_csv(arg, abspath):
    '''Open original file, process, and save to new file'''
    
    # if abspath is False you supplied a filename as the argument
    # the .py and .csv are assumed to be in the same directory
    if abspath is False:
        f_ori = os.path.normpath('{}/{}'.format(os.getcwd(), arg))
        f_new = add_suffix(f_ori)
        
        try:
            with open(f_ori, 'rU') as file1, open(f_new, 'wb') as file2:
                reader = csv.reader(file1)
                writer = csv.writer(file2)
                for row in reader:
                    new_row = find_rtf(row)
                    writer.writerow(new_row)
        except:
            PrintException()
            
            
    # if abspath is True you supplied the full path to the .csv (/Users/NAME/Desktop/wut.csv)
    elif abspath is True:
        f_ori = arg
        f_new = add_suffix(f_ori)
        
        try:
            with open(f_ori, 'rU') as file1, open(f_new, 'wb') as file2:
                reader = csv.reader(file1)
                writer = csv.writer(file2)
                for row in reader:
                    new_row = find_rtf(row)
                    writer.writerow(new_row)
        except:
            PrintException()



 def main():
    '''Initiate script with argument'''
    
    query = sys.argv[1]
    #query = CSV_FILE

    if query.endswith('.csv') or query.endswith('.CSV'):
        which_path = query.split('/')

        if len(which_path) == 1:
            open_csv(query, False)

        elif len(which_path) > 1:
            open_csv(query, True)  
    
    
        print bcolors.OKGREEN + 'CSV PROCESSED' + bcolors.ENDC
    else:
        print bcolors.OKBLUE + 'FILE IS NOT CSV' + bcolors.ENDC


 if __name__ == "__main__":
    main()
	#!/usr/local/bin/python
	# -- coding: utf-8 --

	import re
	import os
	import csv
	import sys
	import glob
	import string
	import linecache
	from Tkinter import Tk
	from cStringIO import StringIO
	from tkFileDialog import askdirectory
	from pyth.plugins.rtf15.reader import Rtf15Reader

	#CSV_FILE = '/Users/NAME/Desktop/file.csv'

	# [ERROR HANDLING]
	#import logging as log
	#log.basicConfig(filename=os.path.expanduser('~/Desktop/log'), level=log.DEBUG)

	class bcolors:
	HEADER = '\033[95m'
	OKBLUE = '\033[94m'
	OKGREEN = '\033[92m'
	WARNING = '\033[93m'
	FAIL = '\033[91m'
	ENDC = '\033[0m'
	BOLD = '\033[1m'
	UNDERLINE = '\033[4m'


	def PrintException():
	exc_type, exc_obj, tb = sys.exc_info()
	f = tb.tb_frame
	lineno = tb.tb_lineno
	filename = f.f_code.co_filename
	linecache.checkcache(filename)
	line = linecache.getline(filename, lineno, f.f_globals)
	print bcolors.HEADER + 'EXCEPTION IN: {}'.format(filename) + bcolors.ENDC
	print bcolors.HEADER + 'LINE: {}'.format(lineno) + bcolors.ENDC
	print bcolors.HEADER + 'CODE: {}'.format(line.strip()) + bcolors.ENDC
	print bcolors.WARNING + 'ERROR: {}'.format(exc_obj) + bcolors.ENDC
	sys.exit()


	def decode_cell(cell):
	'''The cell matched so lets handle it'''

	# variable that will hold the converted text
	temp_cell = []

	# pyth checks for the rtf syntax before processing, so 'unicode_escape' escapes the '\' so pyth doesn't complain
	cell_encode = cell.decode('unicode_escape')
	cell_encode = filter(lambda x: x in string.printable, cell_encode)
	cell_rtf = Rtf15Reader.read(StringIO(cell_encode))

	# turn the pyth object into readable text
	cell_txt = [x.content for x in cell_rtf.content]

	# iterate and extract the pyth object text into temp_cell
	for line in cell_txt:
	for l in line:
	temp_cell.append(l.content)


	# combine and join the extracted text into one string (for one cell)
	combined = [i for sub in temp_cell for i in sub]
	new_cell = ' '.join(combined)

	# the non-ascii characters in your file were followed by _ so i removed them for cleanliness
	# uncomment to keep the _
	new_cell = re.sub('_', '', new_cell)

	# remove extra whitespace and return the converted cell
	# remove L at end of string
	decoded_cell = ' '.join(new_cell[:-1].split())

	# log.debug(decoded_cell)
	# log.info(decoded_cell)
	# log.warning(decoded_cell)
	return decoded_cell



	def find_rtf(row):
	'''Start looking for rtf syntax'''

	# variable that will return the row to writer
	temp_row = []

	# loop and index each cell in row
	for n, cell in enumerate(row):

	# your csv is shitty
	if type(cell) == str:
	cell = unicode(cell, "utf-8", errors="ignore")
	else:
	cell = unicode(cell)

	# if the cell text starts with {\\rtf we need to know
	if re.match(r'^{\\\\rtf', cell):

	# holder
	combined = []

	# collect all cells following matched cell
	for item in row[n:]:
	combined.append(item)

	# combine the rest of the row
	cell = ' '.join(combined)

	# send off to convert rtf
	cell_matched = decode_cell(cell)

	# add the cell, with converted rtf, back to the row
	temp_row.append(cell_matched.encode('ascii', 'ignore'))

	# we don’t want to process further cells because they're now combined
	# break the loop to start at next row
	break

	else:
	# if the cell didn't have rtf just add it back to the row
	temp_row.append(cell.encode('ascii', 'ignore'))

	# log.info(temp_row)
	return temp_row



	def add_suffix(f_ori):
	'''Append suffix to original file name'''

	suffix = '-processed'

	# explode full path into path, name, ext
	path, name = os.path.split(f_ori)
	name, ext = os.path.splitext(name)

	# function to append suffix
	mk_suffix = lambda i: os.path.join(path, '%s%s%s' % (name, i, ext))

	# process and return
	return mk_suffix(suffix)



	def open_csv(arg, abspath):
	'''Open original file, process, and save to new file'''

	# if abspath is False you supplied a filename as the argument
	# the .py and .csv are assumed to be in the same directory
	if abspath is False:
	f_ori = os.path.normpath('{}/{}'.format(os.getcwd(), arg))
	f_new = add_suffix(f_ori)

	try:
	with open(f_ori, 'rU') as file1, open(f_new, 'wb') as file2:
	reader = csv.reader(file1)
	writer = csv.writer(file2)
	for row in reader:
	new_row = find_rtf(row)
	writer.writerow(new_row)
	except:
	PrintException()


	# if abspath is True you supplied the full path to the .csv (/Users/NAME/Desktop/wut.csv)
	elif abspath is True:
	f_ori = arg
	f_new = add_suffix(f_ori)

	try:
	with open(f_ori, 'rU') as file1, open(f_new, 'wb') as file2:
	reader = csv.reader(file1)
	writer = csv.writer(file2)
	for row in reader:
	new_row = find_rtf(row)
	writer.writerow(new_row)
	except:
	PrintException()



	def main():
	'''Initiate script with argument'''

	query = sys.argv[1]
	#query = CSV_FILE

	if query.endswith('.csv') or query.endswith('.CSV'):
	which_path = query.split('/')

	if len(which_path) == 1:
	open_csv(query, False)

	elif len(which_path) > 1:
	open_csv(query, True)


	print bcolors.OKGREEN + 'CSV PROCESSED' + bcolors.ENDC
	else:
	print bcolors.OKBLUE + 'FILE IS NOT CSV' + bcolors.ENDC


	if __name__ == "__main__":
	main()