andersgs · September 11, 2018 23:29 · andersgs · Sep 11, 2018
diff --git a/dummy_data.csv b/dummy_data.csv
diff --git a/unique_alleles.py b/unique_alleles.py
 '''
 Find unique alleles to a group
 '''

 import click
 import pandas as pd

 def load_data(filename):
    '''
    Load CSV in to a pandas.DataFrame
    '''
    return pd.read_csv(filename, engine="python", sep=None)

 def find_unique(tab):
    '''
    1. One-hot encode the data.
    2. Groub By the "GROUP" variable --- this assumes that you have only two groups an IN and an OUT, or an A and B groups
    3. Find the proportion of individuals in each group that have a particular allele in each of the two groups
    4. If group of interest is A, identify those columns that have proportion 1.0 for the allele in A, and 0.0 in group B.
    5. Return a Boolean vector 
    '''
    tab_one = pd.get_dummies(tab, columns=list(tab.columns[tab.columns.str.startswith("LOCUS")]))
    return tab_one.groupby("GROUP").apply(lambda x: x.sum()/x.count()).apply(lambda x: True if x.A == 1.0 and x.B == 0.0 else False)
 

 @click.command()
 @click.argument("input_file")
 def run_finder(input_file):
    tab = load_data(input_file)
    unq = find_unique(tab)
    print(unq)
    
 if __name__ == "__main__":
    run_finder()
	'''
	Find unique alleles to a group
	'''

	import click
	import pandas as pd

	def load_data(filename):
	'''
	Load CSV in to a pandas.DataFrame
	'''
	return pd.read_csv(filename, engine="python", sep=None)

	def find_unique(tab):
	'''
	1. One-hot encode the data.
	2. Groub By the "GROUP" variable --- this assumes that you have only two groups an IN and an OUT, or an A and B groups
	3. Find the proportion of individuals in each group that have a particular allele in each of the two groups
	4. If group of interest is A, identify those columns that have proportion 1.0 for the allele in A, and 0.0 in group B.
	5. Return a Boolean vector
	'''
	tab_one = pd.get_dummies(tab, columns=list(tab.columns[tab.columns.str.startswith("LOCUS")]))
	return tab_one.groupby("GROUP").apply(lambda x: x.sum()/x.count()).apply(lambda x: True if x.A == 1.0 and x.B == 0.0 else False)


	@click.command()
	@click.argument("input_file")
	def run_finder(input_file):
	tab = load_data(input_file)
	unq = find_unique(tab)
	print(unq)

	if __name__ == "__main__":
	run_finder()