Created
December 31, 2020 17:58
-
-
Save sanxiyn/fddd1f18074076fb47e04733e6b62865 to your computer and use it in GitHub Desktop.
BNT162b2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import python_codon_tables | |
def read_csv_file(file_path): | |
with open(file_path) as f: | |
reader = csv.reader(f) | |
records = list(reader) | |
return records | |
def use_most_frequent(species): | |
substitutions = {} | |
codons_table = python_codon_tables.get_codons_table(species) | |
for amino_acid in codons_table: | |
frequency_table = codons_table[amino_acid] | |
codons = sorted(frequency_table) | |
max_frequency = 0 | |
most_frequent_codon = None | |
for codon in codons: | |
frequency = frequency_table[codon] | |
if frequency > max_frequency: | |
max_frequency = frequency | |
most_frequent_codon = codon | |
for codon in codons: | |
substitutions[codon] = most_frequent_codon | |
return substitutions | |
if __name__ == "__main__": | |
# Use codon usage table of Homo sapiens | |
substitutions = use_most_frequent("h_sapiens_9606") | |
virvac = read_csv_file("side-by-side.csv")[1:] | |
matches = 0 | |
for element in virvac: | |
_, vir, vac = element | |
our = substitutions[vir] | |
if vac == our: | |
matches += 1 | |
print("{:.1f}%".format(100*matches/len(virvac))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I've made some changes to include the proline substitutions and the other ways of calculating the score.
When comparing bases instead of codons the score goes up to 90.97% and is slightly better using Mus musculus at 91.08%