Created
October 18, 2017 20:34
-
-
Save olgabot/7433f8ad938ba70f6c9fbd36ec4d0c66 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import os\n", | |
"import glob" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Icon? i5_05-i7_06.csv i5_10-i7_11.csv i5_15-i7_16.csv i5_20-i7_01.csv\r\n", | |
"i5_01-i7_02.csv i5_06-i7_07.csv i5_11-i7_12.csv i5_16-i7_17.csv\r\n", | |
"i5_02-i7_03.csv i5_07-i7_08.csv i5_12-i7_13.csv i5_17-i7_18.csv\r\n", | |
"i5_03-i7_04.csv i5_08-i7_09.csv i5_13-i7_14.csv i5_18-i7_19.csv\r\n", | |
"i5_04-i7_05.csv i5_09-i7_10.csv i5_14-i7_15.csv i5_19-i7_20.csv\r\n" | |
] | |
} | |
], | |
"source": [ | |
"input_folder = '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates'\n", | |
"! ls $input_folder" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_01-i7_02.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_02-i7_03.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_03-i7_04.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_04-i7_05.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_05-i7_06.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_06-i7_07.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_07-i7_08.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_08-i7_09.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_09-i7_10.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_10-i7_11.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_11-i7_12.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_12-i7_13.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_13-i7_14.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_14-i7_15.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_15-i7_16.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_16-i7_17.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_17-i7_18.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_18-i7_19.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_19-i7_20.csv',\n", | |
" '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_20-i7_01.csv']" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"csvs = glob.glob(os.path.join(input_folder, 'i5*csv'))\n", | |
"csvs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"20\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(384, 5)\n", | |
"(7680, 5)\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sample_ID</th>\n", | |
" <th>Sample_Name</th>\n", | |
" <th>index_name</th>\n", | |
" <th>index</th>\n", | |
" <th>index2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>M2-MAA000377-3_9_M-1</td>\n", | |
" <td>M2-MAA000377-3_9_M-1</td>\n", | |
" <td>CZB-NXT-i7-00385-CZB-NXT-i5-00001</td>\n", | |
" <td>CCACACAAGAGA</td>\n", | |
" <td>TCAATGACTAAA</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>M10-MAA000377-3_9_M-1</td>\n", | |
" <td>M10-MAA000377-3_9_M-1</td>\n", | |
" <td>CZB-NXT-i7-00386-CZB-NXT-i5-00002</td>\n", | |
" <td>AGCCAATGTGGG</td>\n", | |
" <td>GGTGATAGACGC</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>M15-MAA000377-3_9_M-1</td>\n", | |
" <td>M15-MAA000377-3_9_M-1</td>\n", | |
" <td>CZB-NXT-i7-00387-CZB-NXT-i5-00003</td>\n", | |
" <td>GTCAGATACCAC</td>\n", | |
" <td>CTCTTGTATCTT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>M16-MAA000377-3_9_M-1</td>\n", | |
" <td>M16-MAA000377-3_9_M-1</td>\n", | |
" <td>CZB-NXT-i7-00388-CZB-NXT-i5-00004</td>\n", | |
" <td>TTAGAGGGATGG</td>\n", | |
" <td>CAAATAACAGCA</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>N4-MAA000377-3_9_M-1</td>\n", | |
" <td>N4-MAA000377-3_9_M-1</td>\n", | |
" <td>CZB-NXT-i7-00389-CZB-NXT-i5-00005</td>\n", | |
" <td>ATAAGCCTTCTG</td>\n", | |
" <td>ATGTGTATCCTC</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sample_ID Sample_Name \\\n", | |
"0 M2-MAA000377-3_9_M-1 M2-MAA000377-3_9_M-1 \n", | |
"1 M10-MAA000377-3_9_M-1 M10-MAA000377-3_9_M-1 \n", | |
"2 M15-MAA000377-3_9_M-1 M15-MAA000377-3_9_M-1 \n", | |
"3 M16-MAA000377-3_9_M-1 M16-MAA000377-3_9_M-1 \n", | |
"4 N4-MAA000377-3_9_M-1 N4-MAA000377-3_9_M-1 \n", | |
"\n", | |
" index_name index index2 \n", | |
"0 CZB-NXT-i7-00385-CZB-NXT-i5-00001 CCACACAAGAGA TCAATGACTAAA \n", | |
"1 CZB-NXT-i7-00386-CZB-NXT-i5-00002 AGCCAATGTGGG GGTGATAGACGC \n", | |
"2 CZB-NXT-i7-00387-CZB-NXT-i5-00003 GTCAGATACCAC CTCTTGTATCTT \n", | |
"3 CZB-NXT-i7-00388-CZB-NXT-i5-00004 TTAGAGGGATGG CAAATAACAGCA \n", | |
"4 CZB-NXT-i7-00389-CZB-NXT-i5-00005 ATAAGCCTTCTG ATGTGTATCCTC " | |
] | |
}, | |
"execution_count": 35, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"print(len(csvs))\n", | |
"\n", | |
"dfs = []\n", | |
"\n", | |
"for csv in sorted(csvs):\n", | |
"# ! tail $csv\n", | |
" df = pd.read_excel(csv, skiprows=1)\n", | |
" print(df.shape)\n", | |
"# print(df.head())\n", | |
" dfs.append(df)\n", | |
"\n", | |
"combined = pd.concat(dfs)\n", | |
"print(combined.shape)\n", | |
"combined.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/combined.csv\n" | |
] | |
} | |
], | |
"source": [ | |
"csv = os.path.join(input_folder, 'combined.csv')\n", | |
"print(csv)\n", | |
"\n", | |
"# index=False to not write the row numbers on the left\n", | |
"combined.to_csv(csv, index=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Add the `[Data],,,,` header and combine into a separate file" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"! echo '[Data],,,,' | cat - $input_folder/combined.csv > $input_folder/combined_with_header.csv" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[Data],,,,\r\n", | |
"Sample_ID,Sample_Name,index_name,index,index2\r\n", | |
"M2-MAA000377-3_9_M-1,M2-MAA000377-3_9_M-1,CZB-NXT-i7-00385-CZB-NXT-i5-00001,CCACACAAGAGA,TCAATGACTAAA\r\n", | |
"M10-MAA000377-3_9_M-1,M10-MAA000377-3_9_M-1,CZB-NXT-i7-00386-CZB-NXT-i5-00002,AGCCAATGTGGG,GGTGATAGACGC\r\n", | |
"M15-MAA000377-3_9_M-1,M15-MAA000377-3_9_M-1,CZB-NXT-i7-00387-CZB-NXT-i5-00003,GTCAGATACCAC,CTCTTGTATCTT\r\n", | |
"M16-MAA000377-3_9_M-1,M16-MAA000377-3_9_M-1,CZB-NXT-i7-00388-CZB-NXT-i5-00004,TTAGAGGGATGG,CAAATAACAGCA\r\n", | |
"N4-MAA000377-3_9_M-1,N4-MAA000377-3_9_M-1,CZB-NXT-i7-00389-CZB-NXT-i5-00005,ATAAGCCTTCTG,ATGTGTATCCTC\r\n", | |
"N7-MAA000377-3_9_M-1,N7-MAA000377-3_9_M-1,CZB-NXT-i7-00390-CZB-NXT-i5-00006,GACCTCACGCCT,TTCCGAGTCATA\r\n", | |
"N11-MAA000377-3_9_M-1,N11-MAA000377-3_9_M-1,CZB-NXT-i7-00391-CZB-NXT-i5-00007,TGCTGGTGGCTA,TAGAGCGGGCGG\r\n", | |
"N15-MAA000377-3_9_M-1,N15-MAA000377-3_9_M-1,CZB-NXT-i7-00392-CZB-NXT-i5-00008,GAACACCGAAGA,GGAACCGCAGCG\r\n" | |
] | |
} | |
], | |
"source": [ | |
"! head $input_folder/combined_with_header.csv" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment