Skip to content

Instantly share code, notes, and snippets.

@olgabot
Last active August 29, 2015 14:00
Show Gist options
  • Save olgabot/1b4234c28b245e52bfc0 to your computer and use it in GitHub Desktop.
Save olgabot/1b4234c28b245e52bfc0 to your computer and use it in GitHub Desktop.
import pandas as pd
json = '{"chrom":{"18716":"chrM","28459":"chrM","41509":"chrM","22813":"chrM"},"first_bp_intron":{"18716":728,"28459":728,"41509":728,"22813":728},"last_bp_intron":{"18716":5369,"28459":5295,"41509":5295,"22813":5295},"strand":{"18716":2,"28459":2,"41509":2,"22813":2},"intron_motif":{"18716":"CT\\/AC","28459":"CT\\/GC","41509":"CT\\/GC","22813":"CT\\/GC"},"annotated":{"18716":false,"28459":false,"41509":false,"22813":false},"unique_junction_reads":{"18716":99,"28459":1407,"41509":1,"22813":139},"multimap_junction_reads":{"18716":0,"28459":0,"41509":0,"22813":0},"max_overhang":{"18716":49,"28459":50,"41509":34,"22813":45},"sample_id":{"18716":"M4_9","28459":"M6_4","41509":"M6_6","22813":"M1_08"},"multimap_junction_reads_filtered":{"18716":0,"28459":0,"41509":0,"22813":0},"unique_junction_reads_filtered":{"18716":99,"28459":1407,"41509":0,"22813":139},"total_filtered_reads":{"18716":99,"28459":1407,"41509":0,"22813":139},"psi5":{"18716":1.0,"28459":1.0,"41509":null,"22813":1.0},"psi3":{"18716":1.0,"28459":1.0,"41509":null,"22813":1.0}}'
df = pd.read_json(json)
replace_cols = ('unique_junction_reads','multimap_junction_reads', 'max_overhang', 'multimap_junction_reads_filtered',
'unique_junction_reads_filtered', 'total_filtered_reads')
# This is fixed because I chose a dataframe example where first_bp_intron is the same for all
psi_to_replace = 'psi5'
growing_df = df
for sample_id, sample_df in df.groupby('sample_id'):
unobserved_last_bp_intron = set(df.last_bp_intron) - set(sample_df.last_bp_intron)
print 'unobserved_last_bp_intron', unobserved_last_bp_intron
if len(unobserved_last_bp_intron) > 0:
unobserved_df = pd.DataFrame(df.ix[df.last_bp_intron.isin(unobserved_last_bp_intron).unique()])
unobserved_df['sample_id'] = sample_id
unobserved_df.ix[:, replace_cols] = 0
unobserved_df[psi_to_replace] = 0
growing_df = pd.concat([growing_df, unobserved_df])
growing_df
test_df = df
new_index = pd.MultiIndex.from_product(list(itertools.izip_longest(test_df.chrom.unique(),
test_df.first_bp_intron.unique(),
test_df.last_bp_intron.unique())))
test_df = test_df.set_index(['chrom', 'first_bp_intron', 'last_bp_intron'])
test_df.reindex(new_index)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment