Stephen Fordham StephenFordham

Interested in Bioinformatics, web scraping, data visualisation and ML. Tutorials posted via Medium, link below

StephenFordham / pandas_multiple.py

Last active April 22, 2019 20:41

pandas_multiple

	import pandas as pd
	premier_table = pd.read_html('https://en.wikipedia.org/wiki/2018%E2%80%9319_Premier_League')
	print(len(premier_table))

	#Output
	20

StephenFordham / prem_head.py

Created April 22, 2019 20:42

prem_head

	prem = premier_table[4]
	print(prem.head())

StephenFordham / series_convert_error.py

Created April 22, 2019 20:45

series_convert_error

prem['GD'] = pd.to_numeric(['GD'], errors='coerce')

StephenFordham / removing_prefix.py

Last active April 22, 2019 21:42

removing_prefix

	def remove_pos_neg(goal_diff):
	if goal_diff.startswith('+') or goal_diff.startswith('-'):
	n = goal_diff[1:]
	else:
	n = goal_diff
	return n

	assert remove_pos_neg('+64') == '64'
	assert remove_pos_neg('-16') == '16'
	assert remove_pos_neg('65') == '65'

StephenFordham / apply_func.py

Created April 22, 2019 20:56

apply_func

	prem['GD'] = prem['GD'].apply(remove_pos_neg)

	print(prem.head())

StephenFordham / parsing_dataframes.py

Created May 2, 2019 19:42

parsing_dataframes

	import pandas as pd

	A = pd.read_csv('query_sequences.csv')
	B = pd.read_csv('Sequence_reference.csv')

	print(A.columns)
	print(B.columns)

	Output:

StephenFordham / creating_lists.py

Created May 3, 2019 08:50

creating_lists

	my_unknown_id = A['Unknown_sample_no'].tolist()
	my_unknown_seq = A['Unknown_sample_seq'].tolist()

	Reference_species = B['Reference_sequences_ID'].tolist()
	Reference_sequences = B['Reference_sequences'].tolist()

StephenFordham / creating_dictionaries.py

Created May 3, 2019 08:52

creating_dictionaries

	Ref_dict = dict(zip(Reference_species, Reference_sequences))
	Unknown_dict = dict(zip(my_unknown_id, my_unknown_seq))

	print(Ref_dict)
	print(Unknown_dict)

	Output:

	{'A': 'AAAAGCGCGAGGGGGGA', 'K': 'GGGAGAGAGGG', 'Y': 'CGGAGCGTTT', 'T': 'TTTTAGAGAGCTCTG', 'P': 'TAGAGAGCGGCC', 'E': 'GAAGGCGCT', 'V': 'TATAGCGCGCG', 'M': 'TAGAGCGCGA', 'N': 'GGCTCCGG
	GAGA', 'Q': 'GGGGCCCCCATA'}

StephenFordham / seq_comparison.py

Created May 3, 2019 09:22

seq_comparison

	import re
	filename = 'seq_match_compare.csv'
	f = open(filename, 'w')


	headers = 'Query_ID, Query_Seq, Ref_species, Ref_seq, Match, Match start Position\n'
	f.write(headers)

	for ID, seq in Unknown_dict.items():
	for species, seq1 in Ref_dict.items():

StephenFordham / multi_list.py

Created May 8, 2019 17:54

multi_list