Created
October 18, 2017 20:34
-
-
Save olgabot/7433f8ad938ba70f6c9fbd36ec4d0c66 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import os\n", | |
| "import glob" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Icon? i5_05-i7_06.csv i5_10-i7_11.csv i5_15-i7_16.csv i5_20-i7_01.csv\r\n", | |
| "i5_01-i7_02.csv i5_06-i7_07.csv i5_11-i7_12.csv i5_16-i7_17.csv\r\n", | |
| "i5_02-i7_03.csv i5_07-i7_08.csv i5_12-i7_13.csv i5_17-i7_18.csv\r\n", | |
| "i5_03-i7_04.csv i5_08-i7_09.csv i5_13-i7_14.csv i5_18-i7_19.csv\r\n", | |
| "i5_04-i7_05.csv i5_09-i7_10.csv i5_14-i7_15.csv i5_19-i7_20.csv\r\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "input_folder = '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates'\n", | |
| "! ls $input_folder" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 24, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_01-i7_02.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_02-i7_03.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_03-i7_04.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_04-i7_05.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_05-i7_06.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_06-i7_07.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_07-i7_08.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_08-i7_09.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_09-i7_10.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_10-i7_11.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_11-i7_12.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_12-i7_13.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_13-i7_14.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_14-i7_15.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_15-i7_16.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_16-i7_17.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_17-i7_18.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_18-i7_19.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_19-i7_20.csv',\n", | |
| " '/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/i5_20-i7_01.csv']" | |
| ] | |
| }, | |
| "execution_count": 24, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "csvs = glob.glob(os.path.join(input_folder, 'i5*csv'))\n", | |
| "csvs" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "20\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(384, 5)\n", | |
| "(7680, 5)\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Sample_ID</th>\n", | |
| " <th>Sample_Name</th>\n", | |
| " <th>index_name</th>\n", | |
| " <th>index</th>\n", | |
| " <th>index2</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>M2-MAA000377-3_9_M-1</td>\n", | |
| " <td>M2-MAA000377-3_9_M-1</td>\n", | |
| " <td>CZB-NXT-i7-00385-CZB-NXT-i5-00001</td>\n", | |
| " <td>CCACACAAGAGA</td>\n", | |
| " <td>TCAATGACTAAA</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>M10-MAA000377-3_9_M-1</td>\n", | |
| " <td>M10-MAA000377-3_9_M-1</td>\n", | |
| " <td>CZB-NXT-i7-00386-CZB-NXT-i5-00002</td>\n", | |
| " <td>AGCCAATGTGGG</td>\n", | |
| " <td>GGTGATAGACGC</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>M15-MAA000377-3_9_M-1</td>\n", | |
| " <td>M15-MAA000377-3_9_M-1</td>\n", | |
| " <td>CZB-NXT-i7-00387-CZB-NXT-i5-00003</td>\n", | |
| " <td>GTCAGATACCAC</td>\n", | |
| " <td>CTCTTGTATCTT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>M16-MAA000377-3_9_M-1</td>\n", | |
| " <td>M16-MAA000377-3_9_M-1</td>\n", | |
| " <td>CZB-NXT-i7-00388-CZB-NXT-i5-00004</td>\n", | |
| " <td>TTAGAGGGATGG</td>\n", | |
| " <td>CAAATAACAGCA</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>N4-MAA000377-3_9_M-1</td>\n", | |
| " <td>N4-MAA000377-3_9_M-1</td>\n", | |
| " <td>CZB-NXT-i7-00389-CZB-NXT-i5-00005</td>\n", | |
| " <td>ATAAGCCTTCTG</td>\n", | |
| " <td>ATGTGTATCCTC</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Sample_ID Sample_Name \\\n", | |
| "0 M2-MAA000377-3_9_M-1 M2-MAA000377-3_9_M-1 \n", | |
| "1 M10-MAA000377-3_9_M-1 M10-MAA000377-3_9_M-1 \n", | |
| "2 M15-MAA000377-3_9_M-1 M15-MAA000377-3_9_M-1 \n", | |
| "3 M16-MAA000377-3_9_M-1 M16-MAA000377-3_9_M-1 \n", | |
| "4 N4-MAA000377-3_9_M-1 N4-MAA000377-3_9_M-1 \n", | |
| "\n", | |
| " index_name index index2 \n", | |
| "0 CZB-NXT-i7-00385-CZB-NXT-i5-00001 CCACACAAGAGA TCAATGACTAAA \n", | |
| "1 CZB-NXT-i7-00386-CZB-NXT-i5-00002 AGCCAATGTGGG GGTGATAGACGC \n", | |
| "2 CZB-NXT-i7-00387-CZB-NXT-i5-00003 GTCAGATACCAC CTCTTGTATCTT \n", | |
| "3 CZB-NXT-i7-00388-CZB-NXT-i5-00004 TTAGAGGGATGG CAAATAACAGCA \n", | |
| "4 CZB-NXT-i7-00389-CZB-NXT-i5-00005 ATAAGCCTTCTG ATGTGTATCCTC " | |
| ] | |
| }, | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "print(len(csvs))\n", | |
| "\n", | |
| "dfs = []\n", | |
| "\n", | |
| "for csv in sorted(csvs):\n", | |
| "# ! tail $csv\n", | |
| " df = pd.read_excel(csv, skiprows=1)\n", | |
| " print(df.shape)\n", | |
| "# print(df.head())\n", | |
| " dfs.append(df)\n", | |
| "\n", | |
| "combined = pd.concat(dfs)\n", | |
| "print(combined.shape)\n", | |
| "combined.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 45, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "/Users/olgabot/googledrive/MACA/MACA_Optimization_Experiments/NonCombo_IndexPrimers/sample_sheets_novaseq2/index_templates/combined.csv\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "csv = os.path.join(input_folder, 'combined.csv')\n", | |
| "print(csv)\n", | |
| "\n", | |
| "# index=False to not write the row numbers on the left\n", | |
| "combined.to_csv(csv, index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Add the `[Data],,,,` header and combine into a separate file" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 46, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "! echo '[Data],,,,' | cat - $input_folder/combined.csv > $input_folder/combined_with_header.csv" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 47, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[Data],,,,\r\n", | |
| "Sample_ID,Sample_Name,index_name,index,index2\r\n", | |
| "M2-MAA000377-3_9_M-1,M2-MAA000377-3_9_M-1,CZB-NXT-i7-00385-CZB-NXT-i5-00001,CCACACAAGAGA,TCAATGACTAAA\r\n", | |
| "M10-MAA000377-3_9_M-1,M10-MAA000377-3_9_M-1,CZB-NXT-i7-00386-CZB-NXT-i5-00002,AGCCAATGTGGG,GGTGATAGACGC\r\n", | |
| "M15-MAA000377-3_9_M-1,M15-MAA000377-3_9_M-1,CZB-NXT-i7-00387-CZB-NXT-i5-00003,GTCAGATACCAC,CTCTTGTATCTT\r\n", | |
| "M16-MAA000377-3_9_M-1,M16-MAA000377-3_9_M-1,CZB-NXT-i7-00388-CZB-NXT-i5-00004,TTAGAGGGATGG,CAAATAACAGCA\r\n", | |
| "N4-MAA000377-3_9_M-1,N4-MAA000377-3_9_M-1,CZB-NXT-i7-00389-CZB-NXT-i5-00005,ATAAGCCTTCTG,ATGTGTATCCTC\r\n", | |
| "N7-MAA000377-3_9_M-1,N7-MAA000377-3_9_M-1,CZB-NXT-i7-00390-CZB-NXT-i5-00006,GACCTCACGCCT,TTCCGAGTCATA\r\n", | |
| "N11-MAA000377-3_9_M-1,N11-MAA000377-3_9_M-1,CZB-NXT-i7-00391-CZB-NXT-i5-00007,TGCTGGTGGCTA,TAGAGCGGGCGG\r\n", | |
| "N15-MAA000377-3_9_M-1,N15-MAA000377-3_9_M-1,CZB-NXT-i7-00392-CZB-NXT-i5-00008,GAACACCGAAGA,GGAACCGCAGCG\r\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "! head $input_folder/combined_with_header.csv" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment