Skip to content

Instantly share code, notes, and snippets.

@tokejepsen
Last active August 29, 2015 14:02
Show Gist options
  • Save tokejepsen/c79d54cb142103d16ea7 to your computer and use it in GitHub Desktop.
Save tokejepsen/c79d54cb142103d16ea7 to your computer and use it in GitHub Desktop.
Companies Contact
import re
import gspread
gc = gspread.login('LOGIN', 'PASSWORD')
wks = gc.open('Companies Contact')
worksheet = wks.worksheet('Input')
def RemoveDuplicates(column):
values = worksheet.col_values(column)
count = len(values)-1
for value in values:
#proceed if not blank
if value:
print 'duplicate processing {0} of {1}'.format(values.index(value), count)
#filter string value
value = value.replace('http://', '').replace('www.', '')
value = value.split('.')[0]
#find all matches
criteria_re = re.compile(r'(.*\.|^)(\b'+ value +r')\..*|\s('+ value +r')\..*')
cells = worksheet.findall(criteria_re)
#proceed if there are multiple
if len(cells)>1:
print cells
#find highest column value
highestCell = cells[0]
for cell in cells:
if cell.col > highestCell.col:
highestCell = cell
#delete rest of cells
for cell in cells:
if cell != highestCell:
print 'Remove cell ({0}, {1})'.format(cell.row, cell.col)
worksheet.update_cell(cell.row, cell.col, '')
def RemoveBlankCells(column, startRow=0):
values = worksheet.col_values(column)[startRow:]
lastRow = 0
count = len(values)-1 + startRow
for value in values:
if value:
print 'blank processing {0} of {1}'.format(values.index(value) + startRow, count)
worksheet.update_cell(values.index(value) + 1 + startRow, column, '')
worksheet.update_cell(lastRow + 1 + startRow, column, value)
lastRow += 1
#remove duplicates from columns
columns = [4]
for col in columns:
#RemoveDuplicates(col)
RemoveBlankCells(col, 384)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment