Skip to content

Instantly share code, notes, and snippets.

View azkalot1's full-sized avatar

Sergey Kolchenko azkalot1

  • Cellarity
  • Chicago
View GitHub Profile
f, ax = plt.subplots(1,2,figsize=(15,5))
per_cell_sum = mat.sum(axis=1)
ax[0].hist(np.log10(per_cell_sum+1));
ax[0].set_title('Distribtion of #UMIs per cell\n min {}, max {}, mean {} +- {}'.format(min(per_cell_sum),
max(per_cell_sum), np.mean(per_cell_sum),
np.sqrt(np.std(per_cell_sum))));
per_gene_sum = mat.sum(axis=0)
ax[1].hist(np.log10(per_gene_sum+1));
ax[1].set_title('Distribtion of #UMIs per gene\n min {}, max {}, mean {} +- {}'.format(min(per_gene_sum),
max(per_gene_sum), np.mean(per_gene_sum),
#reading 10X data as stated at support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/output/matrices
matrix_dir = "filtered_feature_bc_matrix"
mat = scipy.io.mmread(os.path.join(matrix_dir, "matrix.mtx"))
mat = np.array(mat.todense())
features_path = os.path.join(matrix_dir, "features.tsv")
annotation = pd.read_csv(features_path,sep='\t',header=None)
annotation.columns = ['feature_ids','gene_names','feature_types']
barcodes_path = os.path.join(matrix_dir, "barcodes.tsv")
barcodes = [line.strip() for line in open(barcodes_path, 'r')]
print('Matrix dimensionality {}'.format(mat.shape))
@azkalot1
azkalot1 / load_depend.py
Created February 12, 2019 22:28
load_depend.py
import csv
import gzip
import os
import scipy.io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import umap
from sklearn.cluster import Birch, AffinityPropagation, DBSCAN, MeanShift, SpectralClustering, AgglomerativeClustering, estimate_bandwidth