AnnaTSW0609’s gists

AnnaTSW0609 / first_prototype_grammar_checker.py

Last active February 9, 2019 07:23

	import re
	import os.path
	import time

	# start = time. time()


	def MeSH_DB(data_folder):

	# Source: https://www.nlm.nih.gov/mesh/2019/download/2019New_Mesh_Tree_Hierarchy.txt

AnnaTSW0609 / Assignment 1 analysis script.R

Last active February 15, 2019 02:33

	## Assignment 1 analysis script

	library(GEOquery)

	## getting the decompressed GEO series
	gse <- getGEO(filename='data/GSE50697_family.soft') # data already downloaded to local file in the interest of time
	names(GSMList(gse)) # getting the name of the samples in the dataset

	for (gsm in GSMList(gse)) { # this is the condition we are interested in: whether we have miR-203 or not
	print(Meta(gsm)[['characteristics_ch1']])

AnnaTSW0609 / Get_interpro_XML_query

Last active July 30, 2018 02:20

	<?xml version="1.0" encoding="UTF-8"?>
	<!DOCTYPE Query>
	<Query virtualSchemaName = "plants_mart" formatter = "TSV" header = "0" uniqueRows = "0" count = "" datasetConfigVersion = "0.6" >

	<Dataset name = "athaliana_eg_gene" interface = "default" >
	<Filter name = "with_interpro" excluded = "0"/>
	<Attribute name = "ensembl_gene_id" />
	<Attribute name = "ensembl_transcript_id" />
	<Attribute name = "ensembl_peptide_id" />
	<Attribute name = "interpro" />

AnnaTSW0609 / sqlite3.py

Created July 21, 2018 04:15

	import os
	import sqlite3
	import json

	from biothings import config
	from biothings.utils.hub_db import IDatabase
	from biothings.utils.dotfield import parse_dot_fields
	from biothings.utils.dataload import update_dict_recur
	from biothings.utils.common import json_serial

AnnaTSW0609 / Plant_Parser_trial_1.py

Last active July 12, 2018 14:29

Plant_Parser

	import os.path
	import copy
	from biothings.utils.common import SubStr
	from biothings.utils.dataload import tab2dict, tab2list, value_convert, normalized_value, \
	list2dict, dict_nodup, dict_attrmerge, tab2dict_iter


	def _not_LRG(ld):
	return not ld[1].startswith("LRG_")

AnnaTSW0609 / Plant_Dumper_1st_draft.py

Last active July 2, 2018 02:44

Ensembl_Plant

	import sys
	import os
	import time
	from ftplib import FTP
	import requests

	import biothings, config
	biothings.config_for_app(config)

	from biothings.utils.common import timesofar, safewfile, is_int

AnnaTSW0609 / PantherDB_Parser_draft_1.py

Last active June 16, 2018 14:59

The first draft of the PantherDB Parser+test datafile

	Data = [] # Create an empty dictionary
	import re
	with open("testfile_PantherDB.txt", "r+") as testfile:
	for line in testfile:
	# Creating the items for the reference gene
	# The four lines below creates 1a (name of the species specific database)
	# and 1b (the ID for that gene in the ss database
	split_list = re.split("[\| \t \n]", line)
	ESID= split_list [1]
	ESID_title_break = re.split("[=]", ESID)