Skip to content

Instantly share code, notes, and snippets.

View StephenFordham's full-sized avatar

Stephen Fordham StephenFordham

View GitHub Profile
@StephenFordham
StephenFordham / pandas_multiple.py
Last active April 22, 2019 20:41
pandas_multiple
import pandas as pd
premier_table = pd.read_html('https://en.wikipedia.org/wiki/2018%E2%80%9319_Premier_League')
print(len(premier_table))
#Output
20
prem = premier_table[4]
print(prem.head())
@StephenFordham
StephenFordham / series_convert_error.py
Created April 22, 2019 20:45
series_convert_error
prem['GD'] = pd.to_numeric(['GD'], errors='coerce')
@StephenFordham
StephenFordham / removing_prefix.py
Last active April 22, 2019 21:42
removing_prefix
def remove_pos_neg(goal_diff):
if goal_diff.startswith('+') or goal_diff.startswith('-'):
n = goal_diff[1:]
else:
n = goal_diff
return n
assert remove_pos_neg('+64') == '64'
assert remove_pos_neg('-16') == '16'
assert remove_pos_neg('65') == '65'
prem['GD'] = prem['GD'].apply(remove_pos_neg)
print(prem.head())
import pandas as pd
A = pd.read_csv('query_sequences.csv')
B = pd.read_csv('Sequence_reference.csv')
print(A.columns)
print(B.columns)
Output:
my_unknown_id = A['Unknown_sample_no'].tolist()
my_unknown_seq = A['Unknown_sample_seq'].tolist()
Reference_species = B['Reference_sequences_ID'].tolist()
Reference_sequences = B['Reference_sequences'].tolist()
@StephenFordham
StephenFordham / creating_dictionaries.py
Created May 3, 2019 08:52
creating_dictionaries
Ref_dict = dict(zip(Reference_species, Reference_sequences))
Unknown_dict = dict(zip(my_unknown_id, my_unknown_seq))
print(Ref_dict)
print(Unknown_dict)
Output:
{'A': 'AAAAGCGCGAGGGGGGA', 'K': 'GGGAGAGAGGG', 'Y': 'CGGAGCGTTT', 'T': 'TTTTAGAGAGCTCTG', 'P': 'TAGAGAGCGGCC', 'E': 'GAAGGCGCT', 'V': 'TATAGCGCGCG', 'M': 'TAGAGCGCGA', 'N': 'GGCTCCGG
GAGA', 'Q': 'GGGGCCCCCATA'}
import re
filename = 'seq_match_compare.csv'
f = open(filename, 'w')
headers = 'Query_ID, Query_Seq, Ref_species, Ref_seq, Match, Match start Position\n'
f.write(headers)
for ID, seq in Unknown_dict.items():
for species, seq1 in Ref_dict.items():
f = open('Premier_league.csv')
fString = f.read()
fList = []
for line in fString.split('\n'):
fList.append(line.split(','))
print(fList)
Output: