Skip to content

Instantly share code, notes, and snippets.

@taoliu
Created September 17, 2013 21:47
Show Gist options
  • Save taoliu/6601121 to your computer and use it in GitHub Desktop.
Save taoliu/6601121 to your computer and use it in GitHub Desktop.
Convert ENCODE DCC files.txt file on UCSC site to a tab-delimited file, that can be loaded by spreadsheet or other scripts.
#!/usr/bin/env python
# Time-stamp: <2013-09-17 17:17:21 Tao Liu>
import os
import sys
# ------------------------------------
# Main function
# ------------------------------------
def main():
if len(sys.argv) < 2:
sys.stderr.write( "need 1 paras: %s <files.txt>\n" % sys.argv[ 0 ] )
sys.exit( 1 )
filesfhd = open( sys.argv[ 1 ], "r" )
# search for all meta types
metatypes = set( [ "filename" ] ) # a set
for l in filesfhd:
l = l.rstrip( )
( fn, meta ) = l.split( "\t" )
meta_s = meta.split( "; " )
for eachmeta in meta_s:
( metat, metav ) = eachmeta.split( "=" )
metatypes.add( metat )
filesfhd.seek( 0 )
#for metat in metatypes:
print "\t".join( metatypes )
for l in filesfhd:
this_dict = {}
l = l.rstrip()
( fn, meta ) = l.split( "\t" )
this_dict[ "filename" ] = fn
meta_s = meta.split( "; " )
for eachmeta in meta_s:
( metat, metav ) = eachmeta.split( "=" )
this_dict[ metat ] = metav
# print
to_write = []
for metat in metatypes:
if this_dict.has_key( metat ):
to_write.append( this_dict[ metat ] )
else:
to_write.append( '.' )
print "\t".join( to_write )
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment