Created
December 23, 2016 08:25
-
-
Save JakeConway/96023c4640b8c2b6728eee5256250f8c to your computer and use it in GitHub Desktop.
A script that finds all tables in the UCSC DB compatible with TBRdenWeb block annotations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import MySQLdb, itertools, pprint | |
#establish connection to UCSC DB | |
connection = MySQLdb.connect(host = 'genome-mysql.cse.ucsc.edu', user = 'genome', | |
passwd = '', db = 'hg19') | |
#generate cursor so we can make queries to the DB | |
cursor = connection.cursor() | |
#grab all of the tables in the DB | |
cursor.execute("SHOW TABLES") | |
tables = cursor.fetchall() | |
#number of tables in the DB | |
num_tables = len(tables) | |
#the columns we are looking for considering the structure of the block annotations | |
required_annot_cols = ['chrom', 'chromStart', 'chromEnd', 'name', 'score'] | |
#list to store the tables that have the required columns | |
tables_to_keep = [] | |
#loop through tables | |
for i in range(0, num_tables, 1): | |
print("On table ", i, " out of ", num_tables) | |
#get columns from table, and convert them into a flattened list | |
command = 'SHOW COLUMNS FROM {table}' | |
cursor.execute(command.format(table=str(tables[i][0]))) | |
cols = cursor.fetchall() | |
cols = list(map(list, cols)) | |
cols = list(itertools.chain.from_iterable(cols)) | |
#variable that says if we have all required columns: 1 = yes, 0 = no | |
has_all_cols = 1 | |
#loop through required column names | |
for required in required_annot_cols: | |
#check if column exists in current table | |
try: | |
cols.index(required) | |
#if not, set has_all_cols to 0 and break the loop | |
except: | |
has_all_cols = 0 | |
break | |
#if we have all of the necessary columns for a block annotation, store the name of table in the tables_to_keep list | |
if(has_all_cols == 1): | |
tables_to_keep.append(tables[i][0]) | |
#pretty print list of tables we can keep, so that we can copy and paste it as a list easily | |
pprint.pprint(tables_to_keep) #the list contains 5109 tables out of 11048 possible |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment