Skip to content

Instantly share code, notes, and snippets.

View jamessdixon's full-sized avatar

Jamie Dixon jamessdixon

View GitHub Profile
@jamessdixon
jamessdixon / experiment.ipynb
Created December 16, 2024 16:22
image validation experiment
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@jamessdixon
jamessdixon / image-validation.ipynb
Last active December 16, 2024 16:19
image validation against truth set
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
file_name = 'clusters_' + flavor + '.csv'
with open(file_name, 'w') as t:
header = 'score'+ '\t'+ 'pid'+ '\t'+ 'start'+ '\t'+ 'end'+ '\t'+ 'mz'+ '\t'+ 'charge' + "\n"
t.write(header)
for y_cluster in y_clusters:
item = str(y_cluster.score)+ '\t' + str(y_cluster.pid)+ '\t' + str(y_cluster.start)+ '\t' + str(y_cluster.end)+ '\t' + str(y_cluster.mz)+ '\t' + str(y_cluster.charge) + "\n"
with open(file_name, 'a') as t:
t.write(item)
file_name = 'postprocessed_alignments.csv'
with open(file_name, 'w') as t:
header = 'label'+ '\t'+ 'left_protein'+ '\t'+ 'right_protein'+ '\t'+ 'sequence' + '\t'+ 'b_score' + '\t'+ 'y_score' + '\t'+ 'total_score' + '\t'+ 'precursor_distance' + '\t'+ 'extended_sequence' + '\t'+ 'alignment' + '\n'
t.write(header)
for postprocessed_alignment in postprocessed_alignments:
label, left_protein, right_protein, sequence, b_score, y_score, total_score, precursor_distance, extended_sequence, alignment = postprocessed_alignment
item = str(label)+ '\t' + str(left_protein)+ '\t' + str(right_protein)+ '\t' + str(sequence)+ str(b_score)+ '\t' + str(y_score)+ '\t' + str(total_score)+ '\t' + str(precursor_distance)+ '\t' + str(extended_sequence)+ '\t' + str(alignment)+ '\t' +'\n'
with open(file_name, 'a') as t:
t.write(item)
file_name = 'rescored_alignments.csv'
with open(file_name, 'w') as t:
header = 'score'+ '\t'+ 'inv_dist'+ '\t'+ 'comb_seq'+ '\t'+ 'flavor' + '\n'
t.write(header)
for rescored_alignment in rescored_alignments:
score, inv_dist, comb_seq, flavor = rescored_alignment
item = str(score)+ '\t' + str(inv_dist)+ '\t' + str(comb_seq)+ '\t' + str(flavor)+ '\n'
with open(file_name, 'a') as t:
t.write(item)
def write_rescored_to_disk(rescoreds,flavor):
file_name = 'rescored_naturals_' + flavor + '.csv'
with open(file_name, 'w') as t:
header = 'score'+ '\t'+ 'inv_dist'+ '\t'+ 'comb_seq'+ '\t'+ 'flavor' + '\n'
t.write(header)
for rescored in rescoreds:
score, inv_dist, comb_seq, flavor = rescored
item = str(score)+ '\t' + str(inv_dist)+ '\t' + str(comb_seq)+ '\t' + str(flavor)+ '\n'
with open(file_name, 'a') as t:
t.write(item)
file_name = 'alignments_' + flavor + '.csv'
with open(file_name, 'w') as t:
header = 'b_mz'+ '\t'+ 'b_start'+ '\t'+ 'b_end'+ '\t'+ 'b_ion'+ '\t'+ 'b_charge'+ '\t'+ 'b_pid'+ 'b_score'+ '\t'+ 'y_mz'+ '\t'+ 'y_start'+ '\t'+ 'y_end'+ '\t'+ 'y_ion'+ '\t'+ 'y_charge'+ '\t'+ 'y_pid'+ '\t'+ 'y_score'+ '\n'
t.write(header)
for alignment in alignments:
b,y= alignment
b_mass, b_start, b_end, b_ion, b_charge, b_pid, b_score = b
y_mass, y_start, y_end, y_ion, y_charge, y_pid, y_score = y
item = str(b_mass)+ '\t' + str(b_start)+ '\t' + str(b_end)+ '\t' + str(b_ion)+ '\t' + str(b_charge) + '\t' + str(b_pid) + '\t' + str(b_score) + str(y_mass)+ '\t' + str(y_start)+ '\t' + str(y_end)+ '\t' + str(y_ion)+ '\t' + str(y_charge) + '\t' + str(y_pid) + '\t' + str(y_score) + '\n'
with open(file_name, 'a') as t:
def write_merged_hybrids_to_disk(merged_hybrids):
file_name = 'merged_hybrids_A.csv'
with open(file_name, 'w') as t:
header = 'total_score'+ '\t'+ 'pid'+ '\t'+ 'start'+ '\t'+ 'end'+ '\t'+ 'score'+ '\t'+ 'mz'+ '\t' + 'charge'+ '\t'+ 'extensions'+ '\t' + 'seq'+ '\n'
t.write(header)
for merged_hybrid in merged_hybrids:
total_score,b,y= merged_hybrid
b_pid,b_start,b_end,b_score,b_mz,b_charge,b_extensions,b_seq = b
item = str(total_score)+ '\t' + str(b_pid)+ '\t' + str(b_start)+ '\t' + str(b_end)+ '\t' + str(b_score) + '\t' + str(b_mz) + '\t' + str(b_charge) + '\t' + str(b_extensions) + '\t' + str(b_seq) + '\n'
with open(file_name, 'a') as t:
def write_merged_seqs_to_disk(merged_seqs):
file_name = 'merged_seqs_.csv'
with open(file_name, 'w') as t:
header = 'total_score'+ '\t'+ 'pid'+ '\t'+ 'start'+ '\t'+ 'end'+ '\t'+ 'score'+ '\t'+ 'mz'+ '\t' + 'charge'+ '\t'+ 'extensions'+ '\t' + 'seq'+ '\n'
t.write(header)
for merged_seq in merged_seqs:
total_score,b,y= merged_seq
b_pid,b_start,b_end,b_score,b_mz,b_charge,b_extensions,b_seq = b
item = str(total_score)+ '\t' + str(b_pid)+ '\t' + str(b_start)+ '\t' + str(b_end)+ '\t' + str(b_score) + '\t' + str(b_mz) + '\t' + str(b_charge) + '\t' + str(b_extensions) + '\t' + str(b_seq) + '\n'
with open(file_name, 'a') as t:
def write_sorted_clusters_to_disk(sorted_clusters, flavor):
file_name = 'sorted_clusters_' + flavor + '.csv'
with open(file_name, 'w') as t:
header = 'score'+ '\t'+ 'pid'+ '\t'+ 'start'+ '\t'+ 'end'+ '\t'+ 'mz'+ '\t'+ 'charge'+ '\t'+ 'extensions'+ "\t" + 'seq'+ "\n"
t.write(header)
for sorted_cluster in sorted_clusters:
item = str(sorted_cluster.score)+ '\t' + str(sorted_cluster.pid)+ '\t' + str(sorted_cluster.start)+ '\t' + str(sorted_cluster.end)+ '\t' + str(sorted_cluster.mz)+ '\t' + str(sorted_cluster.charge) + '\t' + str(sorted_cluster.extensions) + '\t' + str(sorted_cluster.seq) + "\n"
with open(file_name, 'a') as t:
t.write(item)