Skip to content

Instantly share code, notes, and snippets.

import re
import os.path
import time
# start = time. time()
def MeSH_DB(data_folder):
# Source: https://www.nlm.nih.gov/mesh/2019/download/2019New_Mesh_Tree_Hierarchy.txt
## Assignment 1 analysis script
library(GEOquery)
## getting the decompressed GEO series
gse <- getGEO(filename='data/GSE50697_family.soft') # data already downloaded to local file in the interest of time
names(GSMList(gse)) # getting the name of the samples in the dataset
for (gsm in GSMList(gse)) { # this is the condition we are interested in: whether we have miR-203 or not
print(Meta(gsm)[['characteristics_ch1']])
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE Query>
<Query virtualSchemaName = "plants_mart" formatter = "TSV" header = "0" uniqueRows = "0" count = "" datasetConfigVersion = "0.6" >
<Dataset name = "athaliana_eg_gene" interface = "default" >
<Filter name = "with_interpro" excluded = "0"/>
<Attribute name = "ensembl_gene_id" />
<Attribute name = "ensembl_transcript_id" />
<Attribute name = "ensembl_peptide_id" />
<Attribute name = "interpro" />
import os
import sqlite3
import json
from biothings import config
from biothings.utils.hub_db import IDatabase
from biothings.utils.dotfield import parse_dot_fields
from biothings.utils.dataload import update_dict_recur
from biothings.utils.common import json_serial
import os.path
import copy
from biothings.utils.common import SubStr
from biothings.utils.dataload import tab2dict, tab2list, value_convert, normalized_value, \
list2dict, dict_nodup, dict_attrmerge, tab2dict_iter
def _not_LRG(ld):
return not ld[1].startswith("LRG_")
import sys
import os
import time
from ftplib import FTP
import requests
import biothings, config
biothings.config_for_app(config)
from biothings.utils.common import timesofar, safewfile, is_int
@AnnaTSW0609
AnnaTSW0609 / PantherDB_Parser_draft_1.py
Last active June 16, 2018 14:59
The first draft of the PantherDB Parser+test datafile
Data = [] # Create an empty dictionary
import re
with open("testfile_PantherDB.txt", "r+") as testfile:
for line in testfile:
# Creating the items for the reference gene
# The four lines below creates 1a (name of the species specific database)
# and 1b (the ID for that gene in the ss database
split_list = re.split("[| \t \n]", line)
ESID= split_list [1]
ESID_title_break = re.split("[=]", ESID)