Ricardo Avila ravila4

Databricks Programming Guidance

This document contains lessons learned with regard to Databricks programming, but also contains some best practices

blobname = "miraw"  
storageaccount = "rdmidlgen2"  
mountname = "/rdmi"

configs = {"fs.azure.account.auth.type": "OAuth",

	# $Id: vim-keys.conf,v 1.2 2010-09-18 09:36:15 nicm Exp $
	#
	# vim-keys.conf, v1.2 2010/09/12
	#
	# By Daniel Thau. Public domain.
	#
	# This configuration file binds many vi- and vim-like bindings to the
	# appropriate tmux key bindings. Note that for many key bindings there is no
	# tmux analogue. This is intended for tmux 1.3, which handles pane selection
	# differently from the previous versions

	# Functions for parallelizing things

	def init_spark(nproc=-1, appname="sparksession"):
	"""Function to start a Spark executor."""
	from pyspark.sql import SparkSession
	if nproc == -1:
	# Use all CUPs
	spark = SparkSession.builder.master(
	"local[*]").appName(appname).getOrCreate()
	else:

	#!/bin/bash
	TYPE=${TYPE:-prot}
	[[ ! -z ${1} ]] && INFILE=${1} \|\| exit 1
	shift
	makeblastdb -in ${INFILE} -dbtype ${TYPE} -parse_seqids ${@} -blastdb_version 5

	#!/usr/bin/env python

	import pandas as pd
	import click
	from Bio.Seq import Seq
	from Bio.SeqRecord import SeqRecord
	from Bio import SeqIO


	@click.command()

	# List unique values in a DataFrame column
	pd.unique(df.column_name.ravel())

	# Convert Series datatype to numeric, getting rid of any non-numeric values
	df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)

	# Grab DataFrame rows where column has certain values
	valuelist = ['value1', 'value2', 'value3']
	df = df[df.column.isin(valuelist)]

	blastp -db fasta.fa -query database.fa \
	-outfmt "6 std stitle qcovs" -num_threads 10 -out out.blast

	#!/usr/bin/env python

	# Sequence alignment using PyMOL
	# The purpose of this script is to generate a sequence alignment between
	# the original crystal structure of the apo and holo models, and the sequence
	# of the finalised, ungapped Rosetta models. This allows us to get a 1 to 1
	# corresponcence between the residue numberings in both structures.

	# USAGE: Run once from the project root.
	# "pockets.csv" contains the information about apo holo pairs.