chrisjurich’s gists

chrisjurich / is_num.cpp

Last active February 26, 2020 02:16

	#include <iostream>
	#include <set>
	#include <map>

	bool
	is_num(std::string& string_token, std::set<char>& accepted_characters){
	auto accepted_chars = std::map<char,int>();
	for(const auto& character : string_token) {
	if(accepted_characters.find(character) == accepted_characters.end()) {
	if(character < '0' or character > '9') {

chrisjurich / AltString.cc

Created April 11, 2020 07:05

	#include <iostream>
	#include <vector>


	struct Strings : std::vector<std::string> {

	std::vector<std::string> tokens;
	// unfortunately you have to override all of the constructors
	Strings(std::vector<std::string> input_tokens) : tokens(std::move(input_tokens)) {}

chrisjurich / file_aggregator.py

Created May 9, 2020 02:42

	def is_header_file(file_name):
	"""Method that checks if the file is a C++ header file"""
	file_ending = file_name.split('.')[-1].lower()
	for ending in "h\|hh\|hpp\|hxx\|h++".split('\|'):
	if ending == file_ending:
	return True
	return False

	def is_source_file(file_name):
	"""Method that checks if the file is a C++ source file"""

chrisjurich / eterna_recalculated.py

Last active June 24, 2020 17:58

	def eterna_recalculated(row,scale_max=2.3):
	"""Helper method that recalculates the Eterna score for an entry from a dataframe. It will then put the score back into the row. Please note that there is not a 1:1 correspondence between the actual and recalculated scores"""
	assert len(row["target_structure"]) == len(row["sequence"])
	# sometimes there is a fingerprint sequence at the end of the sturcutre, If that is the case it needs to be removed
	sequence = re.sub("AAAGAAACAACAACAACAAC$","",row["sequence"])
	# data_len is the number of data points that will be reviewed
	data_len = min(
	len(row["target_structure"]),
	len(row["SHAPE_data"]), # can probably get rid of this one
	len(sequence),

chrisjurich / eterna_blended.py

Created June 25, 2020 03:13

	def blended_eterna_score(row,scale_max=10):
	"""Method that finds the blended, non-binary eterna score for a structure. Takes the row from a pandas df as input"""
	assert len(row["target_structure"]) == len(row["sequence"])
	# sometimes there is a fingerprint sequence at the end of the sturcutre, If that is the case it needs to be removed
	sequence = re.sub("AAAGAAACAACAACAACAAC$","",row["sequence"])
	# data_len is the number of data points that will be reviewed
	data_len = min(
	len(row["target_structure"]),
	len(row["SHAPE_data"]), # can probably get rid of this one
	len(sequence),

chrisjurich / rec_checker.py

Created August 14, 2020 20:44

	import urllib.request
	from html.parser import HTMLParser
	from bs4 import BeautifulSoup
	import re
	import datetime

	###################################################################################
	####################### HELPER FUNCTIONS ##########################################
	def get_date_from_href(href):
	"""Helper method that gets date from a hypertext link. Assumes format of MM/DD/YYYY. Raises error if number of matches != 1"""

chrisjurich / structured_bindings.cpp

Created August 23, 2020 17:52

	#include<tuple>
	#include<vector>
	#include<map>
	#include<iostream>

	///////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////
	// NOTE: you have to pass the flag "-std=c++17" to the compiler.
	///////////////////////////////////////////////////////////////////

chrisjurich / motif.py

Created January 8, 2021 09:40

	from enum import Enum
	from abc import ABC, abstractmethod
	from multipledispatch import dispatch

	class MotifType(Enum):
	SINGLESTRAND = 0
	HELIX = 1
	HAIRPIN = 2
	JUNCTION = 3

chrisjurich / dsci.py

Last active April 13, 2021 01:19

	from scipy.stats import mannwhitneyu

	# Source: https://www.biorxiv.org/content/10.1101/2020.06.29.178343v2.full.pdf

	def dsci( sequence, target, dms ):
	# TODO remember, have to replace the dead nt's with N
	assert len( sequence ) == len( target ) and len( target ) == len( dms )
	# first, gotta do the paired/unpaired
	paired, unpaired = [], []
	for nt, db, val in zip( sequence, target, dms):

chrisjurich / score_dms.py

Last active September 13, 2021 18:11

	import math

	def stat_test( lower_avg, lower_err, upper_avg, upper_err ):
	# not a real funciton but shoudl return a p value that the
	# lower_avg < upper_avg
	raise TypeError("not implemented")