This document contains lessons learned with regard to Databricks programming, but also contains some best practices
blobname = "miraw"
storageaccount = "rdmidlgen2"
mountname = "/rdmi"
configs = {"fs.azure.account.auth.type": "OAuth",
| # $Id: vim-keys.conf,v 1.2 2010-09-18 09:36:15 nicm Exp $ | |
| # | |
| # vim-keys.conf, v1.2 2010/09/12 | |
| # | |
| # By Daniel Thau. Public domain. | |
| # | |
| # This configuration file binds many vi- and vim-like bindings to the | |
| # appropriate tmux key bindings. Note that for many key bindings there is no | |
| # tmux analogue. This is intended for tmux 1.3, which handles pane selection | |
| # differently from the previous versions |
| # Functions for parallelizing things | |
| def init_spark(nproc=-1, appname="sparksession"): | |
| """Function to start a Spark executor.""" | |
| from pyspark.sql import SparkSession | |
| if nproc == -1: | |
| # Use all CUPs | |
| spark = SparkSession.builder.master( | |
| "local[*]").appName(appname).getOrCreate() | |
| else: |
| blastp -db fasta.fa -query database.fa \ | |
| -outfmt "6 std stitle qcovs" -num_threads 10 -out out.blast |
| #!/bin/bash | |
| TYPE=${TYPE:-prot} | |
| [[ ! -z ${1} ]] && INFILE=${1} || exit 1 | |
| shift | |
| makeblastdb -in ${INFILE} -dbtype ${TYPE} -parse_seqids ${@} -blastdb_version 5 |
| #!/usr/bin/env python | |
| import pandas as pd | |
| import click | |
| from Bio.Seq import Seq | |
| from Bio.SeqRecord import SeqRecord | |
| from Bio import SeqIO | |
| @click.command() |
| # List unique values in a DataFrame column | |
| pd.unique(df.column_name.ravel()) | |
| # Convert Series datatype to numeric, getting rid of any non-numeric values | |
| df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True) | |
| # Grab DataFrame rows where column has certain values | |
| valuelist = ['value1', 'value2', 'value3'] | |
| df = df[df.column.isin(valuelist)] |
| def flatten_json(y): | |
| out = {} | |
| def flatten(x, name=''): | |
| if type(x) is dict: | |
| for a in x: | |
| flatten(x[a], name + a + '_') | |
| elif type(x) is list: | |
| i = 0 | |
| for a in x: |
| #!/usr/bin/env python | |
| # Sequence alignment using PyMOL | |
| # The purpose of this script is to generate a sequence alignment between | |
| # the original crystal structure of the apo and holo models, and the sequence | |
| # of the finalised, ungapped Rosetta models. This allows us to get a 1 to 1 | |
| # corresponcence between the residue numberings in both structures. | |
| # USAGE: Run once from the project root. | |
| # "pockets.csv" contains the information about apo holo pairs. |
This document contains lessons learned with regard to Databricks programming, but also contains some best practices
blobname = "miraw"
storageaccount = "rdmidlgen2"
mountname = "/rdmi"
configs = {"fs.azure.account.auth.type": "OAuth",