This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <set> | |
#include <map> | |
bool | |
is_num(std::string& string_token, std::set<char>& accepted_characters){ | |
auto accepted_chars = std::map<char,int>(); | |
for(const auto& character : string_token) { | |
if(accepted_characters.find(character) == accepted_characters.end()) { | |
if(character < '0' or character > '9') { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
struct Strings : std::vector<std::string> { | |
std::vector<std::string> tokens; | |
// unfortunately you have to override all of the constructors | |
Strings(std::vector<std::string> input_tokens) : tokens(std::move(input_tokens)) {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def is_header_file(file_name): | |
"""Method that checks if the file is a C++ header file""" | |
file_ending = file_name.split('.')[-1].lower() | |
for ending in "h|hh|hpp|hxx|h++".split('|'): | |
if ending == file_ending: | |
return True | |
return False | |
def is_source_file(file_name): | |
"""Method that checks if the file is a C++ source file""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def eterna_recalculated(row,scale_max=2.3): | |
"""Helper method that recalculates the Eterna score for an entry from a dataframe. It will then put the score back into the row. Please note that there is not a 1:1 correspondence between the actual and recalculated scores""" | |
assert len(row["target_structure"]) == len(row["sequence"]) | |
# sometimes there is a fingerprint sequence at the end of the sturcutre, If that is the case it needs to be removed | |
sequence = re.sub("AAAGAAACAACAACAACAAC$","",row["sequence"]) | |
# data_len is the number of data points that will be reviewed | |
data_len = min( | |
len(row["target_structure"]), | |
len(row["SHAPE_data"]), # can probably get rid of this one | |
len(sequence), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def blended_eterna_score(row,scale_max=10): | |
"""Method that finds the blended, non-binary eterna score for a structure. Takes the row from a pandas df as input""" | |
assert len(row["target_structure"]) == len(row["sequence"]) | |
# sometimes there is a fingerprint sequence at the end of the sturcutre, If that is the case it needs to be removed | |
sequence = re.sub("AAAGAAACAACAACAACAAC$","",row["sequence"]) | |
# data_len is the number of data points that will be reviewed | |
data_len = min( | |
len(row["target_structure"]), | |
len(row["SHAPE_data"]), # can probably get rid of this one | |
len(sequence), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib.request | |
from html.parser import HTMLParser | |
from bs4 import BeautifulSoup | |
import re | |
import datetime | |
################################################################################### | |
####################### HELPER FUNCTIONS ########################################## | |
def get_date_from_href(href): | |
"""Helper method that gets date from a hypertext link. Assumes format of MM/DD/YYYY. Raises error if number of matches != 1""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<tuple> | |
#include<vector> | |
#include<map> | |
#include<iostream> | |
/////////////////////////////////////////////////////////////////// | |
/////////////////////////////////////////////////////////////////// | |
/////////////////////////////////////////////////////////////////// | |
// NOTE: you have to pass the flag "-std=c++17" to the compiler. | |
/////////////////////////////////////////////////////////////////// |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from enum import Enum | |
from abc import ABC, abstractmethod | |
from multipledispatch import dispatch | |
class MotifType(Enum): | |
SINGLESTRAND = 0 | |
HELIX = 1 | |
HAIRPIN = 2 | |
JUNCTION = 3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.stats import mannwhitneyu | |
# Source: https://www.biorxiv.org/content/10.1101/2020.06.29.178343v2.full.pdf | |
def dsci( sequence, target, dms ): | |
# TODO remember, have to replace the dead nt's with N | |
assert len( sequence ) == len( target ) and len( target ) == len( dms ) | |
# first, gotta do the paired/unpaired | |
paired, unpaired = [], [] | |
for nt, db, val in zip( sequence, target, dms): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
def stat_test( lower_avg, lower_err, upper_avg, upper_err ): | |
# not a real funciton but shoudl return a p value that the | |
# lower_avg < upper_avg | |
raise TypeError("not implemented") | |