Skip to content

Instantly share code, notes, and snippets.

@alexlenail
Created January 21, 2019 02:16
Show Gist options
  • Save alexlenail/d340f69b77f5fad695f77452babd23f8 to your computer and use it in GitHub Desktop.
Save alexlenail/d340f69b77f5fad695f77452babd23f8 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test NetReg\n",
"\n",
"Docs https://dirmeier.github.io/netReg/articles/netReg_commandline.html"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import numpy as np\n",
"import pandas as pd\n",
"import networkx as nx"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### First half is class 1, second half is class 0"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., -1., -1., -1., -1., -1., -1.,\n",
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n",
" -1.])"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"NUM_POSITIVES = 20\n",
"NUM_NEGATIVES = 20\n",
"y = np.concatenate((np.ones(NUM_POSITIVES), np.zeros(NUM_NEGATIVES)-1))\n",
"y"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"pd.Series(y).to_csv('./Y.tsv', sep='\\t', index=False, header=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 0th column is identical to the response variable; so a parameter vector of [1, 0...] would fit with 100% accuracy"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>...</th>\n",
" <th>20</th>\n",
" <th>21</th>\n",
" <th>22</th>\n",
" <th>23</th>\n",
" <th>24</th>\n",
" <th>25</th>\n",
" <th>26</th>\n",
" <th>27</th>\n",
" <th>28</th>\n",
" <th>29</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>0.111089</td>\n",
" <td>-1.323561</td>\n",
" <td>-1.674873</td>\n",
" <td>0.634947</td>\n",
" <td>-1.627697</td>\n",
" <td>-0.227472</td>\n",
" <td>0.699019</td>\n",
" <td>0.830343</td>\n",
" <td>-0.692991</td>\n",
" <td>...</td>\n",
" <td>0.428539</td>\n",
" <td>-2.653763</td>\n",
" <td>0.694787</td>\n",
" <td>-0.057533</td>\n",
" <td>0.888611</td>\n",
" <td>0.501566</td>\n",
" <td>-1.318099</td>\n",
" <td>1.141719</td>\n",
" <td>-1.836204</td>\n",
" <td>-1.041076</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>-1.110794</td>\n",
" <td>-1.395908</td>\n",
" <td>0.294691</td>\n",
" <td>1.992291</td>\n",
" <td>1.329013</td>\n",
" <td>-0.407797</td>\n",
" <td>0.886473</td>\n",
" <td>1.432863</td>\n",
" <td>2.084924</td>\n",
" <td>...</td>\n",
" <td>0.180092</td>\n",
" <td>1.789783</td>\n",
" <td>0.288190</td>\n",
" <td>1.419874</td>\n",
" <td>-0.851897</td>\n",
" <td>0.319064</td>\n",
" <td>0.140920</td>\n",
" <td>0.961895</td>\n",
" <td>-1.970683</td>\n",
" <td>-0.107190</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>0.346636</td>\n",
" <td>0.704714</td>\n",
" <td>0.729598</td>\n",
" <td>-0.689270</td>\n",
" <td>-0.441797</td>\n",
" <td>-1.576284</td>\n",
" <td>1.682755</td>\n",
" <td>-1.820408</td>\n",
" <td>0.017732</td>\n",
" <td>...</td>\n",
" <td>1.355737</td>\n",
" <td>2.159160</td>\n",
" <td>-0.252891</td>\n",
" <td>-0.243918</td>\n",
" <td>0.210844</td>\n",
" <td>-1.049625</td>\n",
" <td>1.076635</td>\n",
" <td>1.113430</td>\n",
" <td>0.414070</td>\n",
" <td>-1.883389</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1.0</td>\n",
" <td>-1.010921</td>\n",
" <td>0.728710</td>\n",
" <td>-1.651887</td>\n",
" <td>1.559163</td>\n",
" <td>-0.476617</td>\n",
" <td>2.287806</td>\n",
" <td>-0.163085</td>\n",
" <td>-0.286451</td>\n",
" <td>-0.111877</td>\n",
" <td>...</td>\n",
" <td>-1.777078</td>\n",
" <td>0.997048</td>\n",
" <td>1.896403</td>\n",
" <td>0.694598</td>\n",
" <td>-2.278491</td>\n",
" <td>-0.927568</td>\n",
" <td>-0.157392</td>\n",
" <td>-0.453236</td>\n",
" <td>1.226394</td>\n",
" <td>0.454105</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.0</td>\n",
" <td>2.259994</td>\n",
" <td>-1.389239</td>\n",
" <td>-0.601431</td>\n",
" <td>0.472016</td>\n",
" <td>-0.470637</td>\n",
" <td>-0.523584</td>\n",
" <td>-0.860012</td>\n",
" <td>0.094330</td>\n",
" <td>-0.106012</td>\n",
" <td>...</td>\n",
" <td>1.637111</td>\n",
" <td>-0.878768</td>\n",
" <td>-2.569540</td>\n",
" <td>-0.388560</td>\n",
" <td>-1.646358</td>\n",
" <td>0.033012</td>\n",
" <td>0.328868</td>\n",
" <td>-0.246306</td>\n",
" <td>-1.815482</td>\n",
" <td>-1.717986</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 \\\n",
"0 1.0 0.111089 -1.323561 -1.674873 0.634947 -1.627697 -0.227472 0.699019 \n",
"1 1.0 -1.110794 -1.395908 0.294691 1.992291 1.329013 -0.407797 0.886473 \n",
"2 1.0 0.346636 0.704714 0.729598 -0.689270 -0.441797 -1.576284 1.682755 \n",
"3 1.0 -1.010921 0.728710 -1.651887 1.559163 -0.476617 2.287806 -0.163085 \n",
"4 1.0 2.259994 -1.389239 -0.601431 0.472016 -0.470637 -0.523584 -0.860012 \n",
"\n",
" 8 9 ... 20 21 22 23 \\\n",
"0 0.830343 -0.692991 ... 0.428539 -2.653763 0.694787 -0.057533 \n",
"1 1.432863 2.084924 ... 0.180092 1.789783 0.288190 1.419874 \n",
"2 -1.820408 0.017732 ... 1.355737 2.159160 -0.252891 -0.243918 \n",
"3 -0.286451 -0.111877 ... -1.777078 0.997048 1.896403 0.694598 \n",
"4 0.094330 -0.106012 ... 1.637111 -0.878768 -2.569540 -0.388560 \n",
"\n",
" 24 25 26 27 28 29 \n",
"0 0.888611 0.501566 -1.318099 1.141719 -1.836204 -1.041076 \n",
"1 -0.851897 0.319064 0.140920 0.961895 -1.970683 -0.107190 \n",
"2 0.210844 -1.049625 1.076635 1.113430 0.414070 -1.883389 \n",
"3 -2.278491 -0.927568 -0.157392 -0.453236 1.226394 0.454105 \n",
"4 -1.646358 0.033012 0.328868 -0.246306 -1.815482 -1.717986 \n",
"\n",
"[5 rows x 30 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.DataFrame(np.random.normal(size=(NUM_POSITIVES+NUM_NEGATIVES,30)))\n",
"data.loc[0:NUM_POSITIVES, 0] = 1\n",
"data.loc[NUM_POSITIVES:NUM_POSITIVES+NUM_NEGATIVES, 0] = -1\n",
"\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>...</th>\n",
" <th>20</th>\n",
" <th>21</th>\n",
" <th>22</th>\n",
" <th>23</th>\n",
" <th>24</th>\n",
" <th>25</th>\n",
" <th>26</th>\n",
" <th>27</th>\n",
" <th>28</th>\n",
" <th>29</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>-1.0</td>\n",
" <td>0.968848</td>\n",
" <td>0.535183</td>\n",
" <td>0.494881</td>\n",
" <td>-0.331251</td>\n",
" <td>-1.532249</td>\n",
" <td>-0.183939</td>\n",
" <td>1.597095</td>\n",
" <td>0.646095</td>\n",
" <td>-1.279239</td>\n",
" <td>...</td>\n",
" <td>-0.623700</td>\n",
" <td>-0.252786</td>\n",
" <td>-0.156151</td>\n",
" <td>-0.294547</td>\n",
" <td>1.469122</td>\n",
" <td>0.634574</td>\n",
" <td>-0.976616</td>\n",
" <td>-0.307642</td>\n",
" <td>0.124670</td>\n",
" <td>1.076564</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>-1.0</td>\n",
" <td>0.852596</td>\n",
" <td>-1.526027</td>\n",
" <td>-0.000638</td>\n",
" <td>-0.318761</td>\n",
" <td>0.127583</td>\n",
" <td>0.614107</td>\n",
" <td>1.235824</td>\n",
" <td>1.697700</td>\n",
" <td>0.028659</td>\n",
" <td>...</td>\n",
" <td>-0.530474</td>\n",
" <td>-0.392842</td>\n",
" <td>-0.018669</td>\n",
" <td>-2.312604</td>\n",
" <td>-1.300310</td>\n",
" <td>-0.791726</td>\n",
" <td>-0.664507</td>\n",
" <td>0.493303</td>\n",
" <td>0.548115</td>\n",
" <td>-1.072233</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>-1.0</td>\n",
" <td>-0.090601</td>\n",
" <td>-0.602560</td>\n",
" <td>-1.081841</td>\n",
" <td>-0.137571</td>\n",
" <td>-0.257802</td>\n",
" <td>-0.136415</td>\n",
" <td>-1.896769</td>\n",
" <td>-0.024068</td>\n",
" <td>-0.654289</td>\n",
" <td>...</td>\n",
" <td>-1.110884</td>\n",
" <td>-0.227361</td>\n",
" <td>0.256517</td>\n",
" <td>-0.416979</td>\n",
" <td>-1.537198</td>\n",
" <td>-0.713519</td>\n",
" <td>-2.207975</td>\n",
" <td>-0.887400</td>\n",
" <td>0.785701</td>\n",
" <td>0.179991</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>-1.0</td>\n",
" <td>-0.536596</td>\n",
" <td>1.482457</td>\n",
" <td>0.347636</td>\n",
" <td>0.344189</td>\n",
" <td>-0.754875</td>\n",
" <td>0.242360</td>\n",
" <td>-0.941984</td>\n",
" <td>1.440234</td>\n",
" <td>0.763788</td>\n",
" <td>...</td>\n",
" <td>0.541740</td>\n",
" <td>-0.777222</td>\n",
" <td>0.040652</td>\n",
" <td>0.121212</td>\n",
" <td>-2.039872</td>\n",
" <td>-0.402453</td>\n",
" <td>-0.772498</td>\n",
" <td>0.040955</td>\n",
" <td>-0.077092</td>\n",
" <td>-1.876167</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>-1.0</td>\n",
" <td>0.713270</td>\n",
" <td>0.130870</td>\n",
" <td>-0.584375</td>\n",
" <td>-0.864478</td>\n",
" <td>0.250767</td>\n",
" <td>-0.993269</td>\n",
" <td>-0.094580</td>\n",
" <td>0.738763</td>\n",
" <td>0.114410</td>\n",
" <td>...</td>\n",
" <td>0.337343</td>\n",
" <td>0.055575</td>\n",
" <td>-0.167884</td>\n",
" <td>-1.945240</td>\n",
" <td>1.063749</td>\n",
" <td>0.387133</td>\n",
" <td>-0.177334</td>\n",
" <td>-1.726598</td>\n",
" <td>1.846579</td>\n",
" <td>-0.080217</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 \\\n",
"35 -1.0 0.968848 0.535183 0.494881 -0.331251 -1.532249 -0.183939 1.597095 \n",
"36 -1.0 0.852596 -1.526027 -0.000638 -0.318761 0.127583 0.614107 1.235824 \n",
"37 -1.0 -0.090601 -0.602560 -1.081841 -0.137571 -0.257802 -0.136415 -1.896769 \n",
"38 -1.0 -0.536596 1.482457 0.347636 0.344189 -0.754875 0.242360 -0.941984 \n",
"39 -1.0 0.713270 0.130870 -0.584375 -0.864478 0.250767 -0.993269 -0.094580 \n",
"\n",
" 8 9 ... 20 21 22 23 \\\n",
"35 0.646095 -1.279239 ... -0.623700 -0.252786 -0.156151 -0.294547 \n",
"36 1.697700 0.028659 ... -0.530474 -0.392842 -0.018669 -2.312604 \n",
"37 -0.024068 -0.654289 ... -1.110884 -0.227361 0.256517 -0.416979 \n",
"38 1.440234 0.763788 ... 0.541740 -0.777222 0.040652 0.121212 \n",
"39 0.738763 0.114410 ... 0.337343 0.055575 -0.167884 -1.945240 \n",
"\n",
" 24 25 26 27 28 29 \n",
"35 1.469122 0.634574 -0.976616 -0.307642 0.124670 1.076564 \n",
"36 -1.300310 -0.791726 -0.664507 0.493303 0.548115 -1.072233 \n",
"37 -1.537198 -0.713519 -2.207975 -0.887400 0.785701 0.179991 \n",
"38 -2.039872 -0.402453 -0.772498 0.040955 -0.077092 -1.876167 \n",
"39 1.063749 0.387133 -0.177334 -1.726598 1.846579 -0.080217 \n",
"\n",
"[5 rows x 30 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.tail()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"data.to_csv('./X.tsv', sep='\\t', header=False, index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"CLI: \n",
"\n",
"`netReg -d X.tsv -r Y.tsv -l 10 -x 1 --maxit 1000 --threshold 0.0001 -o outfile.tsv`"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## II. Evaluate results"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"coefs = pd.read_csv('./outfile_coefficients.tsv', header=None).values"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"intercept = pd.read_csv('./outfile_intercepts.tsv', header=None).values"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### First half should be `1`, second half should be `0`"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[-0.00869428],\n",
" [ 0.12794866],\n",
" [-0.03503538],\n",
" [ 0.11677996],\n",
" [-0.2490059 ],\n",
" [-0.1015339 ],\n",
" [-0.12168017],\n",
" [ 0.22508531],\n",
" [ 0.04153277],\n",
" [ 0.12429876],\n",
" [ 0.13799052],\n",
" [ 0.14775427],\n",
" [ 0.00776043],\n",
" [ 0.06060751],\n",
" [-0.15687861],\n",
" [-0.04678311],\n",
" [ 0.06633899],\n",
" [ 0.18169799],\n",
" [ 0.03367905],\n",
" [-0.05396966],\n",
" [ 0.04810825],\n",
" [ 0.1486447 ],\n",
" [-0.02597513],\n",
" [ 0.0548803 ],\n",
" [-0.0996878 ],\n",
" [-0.00660804],\n",
" [ 0.00068166],\n",
" [-0.02186722],\n",
" [-0.0470199 ],\n",
" [-0.09402682],\n",
" [ 0.00724804],\n",
" [ 0.06777331],\n",
" [-0.01462901],\n",
" [-0.03576274],\n",
" [-0.10208191],\n",
" [-0.10461726],\n",
" [-0.09161686],\n",
" [ 0.01386069],\n",
" [ 0.06373627],\n",
" [-0.07603602]])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.dot(data, coefs) + intercept"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0. ],\n",
" [-0.11182981],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ],\n",
" [ 0. ]])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"coefs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment