Skip to content

Instantly share code, notes, and snippets.

@JakeConway
Created December 23, 2016 08:25
Show Gist options
  • Save JakeConway/96023c4640b8c2b6728eee5256250f8c to your computer and use it in GitHub Desktop.
Save JakeConway/96023c4640b8c2b6728eee5256250f8c to your computer and use it in GitHub Desktop.
A script that finds all tables in the UCSC DB compatible with TBRdenWeb block annotations
import MySQLdb, itertools, pprint
#establish connection to UCSC DB
connection = MySQLdb.connect(host = 'genome-mysql.cse.ucsc.edu', user = 'genome',
passwd = '', db = 'hg19')
#generate cursor so we can make queries to the DB
cursor = connection.cursor()
#grab all of the tables in the DB
cursor.execute("SHOW TABLES")
tables = cursor.fetchall()
#number of tables in the DB
num_tables = len(tables)
#the columns we are looking for considering the structure of the block annotations
required_annot_cols = ['chrom', 'chromStart', 'chromEnd', 'name', 'score']
#list to store the tables that have the required columns
tables_to_keep = []
#loop through tables
for i in range(0, num_tables, 1):
print("On table ", i, " out of ", num_tables)
#get columns from table, and convert them into a flattened list
command = 'SHOW COLUMNS FROM {table}'
cursor.execute(command.format(table=str(tables[i][0])))
cols = cursor.fetchall()
cols = list(map(list, cols))
cols = list(itertools.chain.from_iterable(cols))
#variable that says if we have all required columns: 1 = yes, 0 = no
has_all_cols = 1
#loop through required column names
for required in required_annot_cols:
#check if column exists in current table
try:
cols.index(required)
#if not, set has_all_cols to 0 and break the loop
except:
has_all_cols = 0
break
#if we have all of the necessary columns for a block annotation, store the name of table in the tables_to_keep list
if(has_all_cols == 1):
tables_to_keep.append(tables[i][0])
#pretty print list of tables we can keep, so that we can copy and paste it as a list easily
pprint.pprint(tables_to_keep) #the list contains 5109 tables out of 11048 possible
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment