Swarchal · April 15, 2016 11:19
diff --git a/finding_spliced_motif.ipynb b/finding_spliced_motif.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Finding a spliced motif\n",
    "\n",
    "### Problem:\n",
    "\n",
    "\n",
    "\n",
    "**input:** 2 DNA strings $s$ and $t$\n",
    "\n",
    "\n",
    "**output:** Collection of indices of $s$ in which the symbols of $t$ appear as a subsequence of $s$\n",
    "\n",
    "e.g\n",
    "\n",
    "> $\\textbf{s:}$\n",
    "> `ACGTACGTGACG`\n",
    ">\n",
    "> $\\textbf{t:}$\n",
    "> `GTA`\n",
    ">\n",
    "> `3 8 10`\n",
    "\n",
    "Can return any one of multiple solutions.  \n",
    "This code will return the indices of the first motif."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "-------------------------------------"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from __future__ import print_function\n",
    "from Bio.SeqIO import to_dict, parse\n",
    "\n",
    "data = open(\"/home/scott/Dropbox/rosalind/rosalind_sseq.txt\")\n",
    "fasta_dict = to_dict(parse(data, 'fasta'))\n",
    "\n",
    "s = str(fasta_dict['Rosalind_8177'].seq)\n",
    "t = str(fasta_dict['Rosalind_6863'].seq)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2 4 7 12 13 19 21 30 33 36 39 49 53 57 59 60 61 66 71 75 78 82 83 84 86 88 93 94 95 101 102 105 112 117 124 137 143 145 146 153 154 160 162 163 166 170 172 178 180 186 190 192 193 195 196 197 198 200 202 203 213 226 230 238 250 258 259 261 264 267 268 272 275 283 287 290 297 299\n"
     ]
    }
   ],
   "source": [
    "def return_ix(s, m):\n",
    "    out = []\n",
    "    for i, char in enumerate(s):\n",
    "        if char == m:\n",
    "            out.append(i+1)\n",
    "    return(out)\n",
    "\n",
    "def first_motif(my_list):\n",
    "    out = []\n",
    "    lim = None\n",
    "    for sublist in my_list:\n",
    "        lim = min(x for x in sublist if lim is None or x > lim)\n",
    "        out.append(lim)\n",
    "    print(*out, sep = \" \")\n",
    "\n",
    "x = [return_ix(s, i) for i in t]\n",
    "first_motif(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "--------------------------------------------------\n",
    "\n",
    "## small example:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[2, 4, 9, 12, 16],\n",
       " [7, 14, 17],\n",
       " [3, 5, 10, 18, 20],\n",
       " [1, 6, 8, 11, 13, 15, 19, 21, 22],\n",
       " [2, 4, 9, 12, 16]]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = \"ATGTGACATGATACATCGAGAA\"\n",
    "t = \"TCGAT\"\n",
    "\n",
    "x = [return_ix(s, i) for i in t]\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2 7 10 11 12\n"
     ]
    }
   ],
   "source": [
    "first_motif(x)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Finding a spliced motif\n",
	"\n",
	"### Problem:\n",
	"\n",
	"\n",
	"\n",
	"input: 2 DNA strings $s$ and $t$\n",
	"\n",
	"\n",
	"output: Collection of indices of $s$ in which the symbols of $t$ appear as a subsequence of $s$\n",
	"\n",
	"e.g\n",
	"\n",
	"> $\\textbf{s:}$\n",
	"> `ACGTACGTGACG`\n",
	">\n",
	"> $\\textbf{t:}$\n",
	"> `GTA`\n",
	">\n",
	"> `3 8 10`\n",
	"\n",
	"Can return any one of multiple solutions. \n",
	"This code will return the indices of the first motif."
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"-------------------------------------"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"from __future__ import print_function\n",
	"from Bio.SeqIO import to_dict, parse\n",
	"\n",
	"data = open(\"/home/scott/Dropbox/rosalind/rosalind_sseq.txt\")\n",
	"fasta_dict = to_dict(parse(data, 'fasta'))\n",
	"\n",
	"s = str(fasta_dict['Rosalind_8177'].seq)\n",
	"t = str(fasta_dict['Rosalind_6863'].seq)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"2 4 7 12 13 19 21 30 33 36 39 49 53 57 59 60 61 66 71 75 78 82 83 84 86 88 93 94 95 101 102 105 112 117 124 137 143 145 146 153 154 160 162 163 166 170 172 178 180 186 190 192 193 195 196 197 198 200 202 203 213 226 230 238 250 258 259 261 264 267 268 272 275 283 287 290 297 299\n"
	]
	}
	],
	"source": [
	"def return_ix(s, m):\n",
	" out = []\n",
	" for i, char in enumerate(s):\n",
	" if char == m:\n",
	" out.append(i+1)\n",
	" return(out)\n",
	"\n",
	"def first_motif(my_list):\n",
	" out = []\n",
	" lim = None\n",
	" for sublist in my_list:\n",
	" lim = min(x for x in sublist if lim is None or x > lim)\n",
	" out.append(lim)\n",
	" print(*out, sep = \" \")\n",
	"\n",
	"x = [return_ix(s, i) for i in t]\n",
	"first_motif(x)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"--------------------------------------------------\n",
	"\n",
	"## small example:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[[2, 4, 9, 12, 16],\n",
	" [7, 14, 17],\n",
	" [3, 5, 10, 18, 20],\n",
	" [1, 6, 8, 11, 13, 15, 19, 21, 22],\n",
	" [2, 4, 9, 12, 16]]"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"s = \"ATGTGACATGATACATCGAGAA\"\n",
	"t = \"TCGAT\"\n",
	"\n",
	"x = [return_ix(s, i) for i in t]\n",
	"x"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"2 7 10 11 12\n"
	]
	}
	],
	"source": [
	"first_motif(x)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.6"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}