Skip to content

Instantly share code, notes, and snippets.

@dniku
Last active November 8, 2015 14:48
Show Gist options
  • Save dniku/d7a11dd2d6d3994b192d to your computer and use it in GitHub Desktop.
Save dniku/d7a11dd2d6d3994b192d to your computer and use it in GitHub Desktop.
import os
import pandas as pd
data_dir = 'data'
expression_filename = 'Our_data_from_expression_console.for.scatterplot.txt'
genes_dir = os.path.join('data', 'genes')
output_dir = os.path.join('data', 'output')
def patch_df(df):
suffix = '.S1'
assert df['Systematic'].str.endswith(suffix).all()
df['Systematic'] = df['Systematic'].str[:-len(suffix)]
return df
def filter_genes(df, input_dir, input_filename, output_dir):
input_path = os.path.join(input_dir, input_filename)
name, ext = os.path.splitext(input_filename)
output_filename = '%s_expression%s' % (name, ext)
output_path = os.path.join(output_dir, output_filename)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
genes = []
with open(input_path, 'r') as f:
for line in f:
line = line.strip()
if line:
genes.append(line)
df_filtered = df[df['Systematic'].isin(genes)]
# To save with index, use:
df_filtered.to_csv(output_path, sep='\t')
# To save without index, use:
# df_filtered.to_csv(output_path, sep='\t', index=False)
if __name__ == '__main__':
df_expression = pd.read_csv(os.path.join(data_dir, expression_filename), sep='\t')
df_expression = patch_df(df_expression)
for gene_list_filename in os.listdir(genes_dir):
filter_genes(df_expression, genes_dir, gene_list_filename, output_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment