Created
December 15, 2016 16:32
-
-
Save Swarchal/98616a344d4e4b5220bcc1e68169d87b to your computer and use it in GitHub Desktop.
lexv_explanation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Ordering strings of varying length lexicographically" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from itertools import combinations_with_replacement as combn\n", | |
| "from itertools import permutations as perm\n", | |
| "\n", | |
| "\n", | |
| "def parse_input(file_in):\n", | |
| " s, n = open(file_in, \"r\")\n", | |
| " return [s.split(), int(n)]\n", | |
| "\n", | |
| "string, n = parse_input(\"test.dat\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['D', 'N', 'A']" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "string" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "3" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Generating the permutations\n", | |
| "Probably a much better way to do this. \n", | |
| "**Note**: pad each string with spaces so that it's `n` characters long." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def make_permutations(s, n):\n", | |
| " out = [perm(x, len(x)) for l in range(n+1) for x in combn(s, l)]\n", | |
| " set_list = set([i for s in out for i in s if i is not ()])\n", | |
| " return [\"\".join(chars).ljust(n) for chars in set_list]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['ADN',\n", | |
| " 'AN ',\n", | |
| " 'AD ',\n", | |
| " 'DA ',\n", | |
| " 'NAN',\n", | |
| " 'DND',\n", | |
| " 'DDA',\n", | |
| " 'ANA',\n", | |
| " 'AAA',\n", | |
| " 'DD ',\n", | |
| " 'AA ',\n", | |
| " 'NND',\n", | |
| " 'N ',\n", | |
| " 'A ',\n", | |
| " 'DDN',\n", | |
| " 'ANN',\n", | |
| " 'AAN',\n", | |
| " 'ND ',\n", | |
| " 'ADA',\n", | |
| " 'NDD',\n", | |
| " 'NAA',\n", | |
| " 'NNN',\n", | |
| " 'DAD',\n", | |
| " 'NA ',\n", | |
| " 'DNN',\n", | |
| " 'D ',\n", | |
| " 'NDA',\n", | |
| " 'NAD',\n", | |
| " 'NNA',\n", | |
| " 'DDD',\n", | |
| " 'AAD',\n", | |
| " 'NDN',\n", | |
| " 'DNA',\n", | |
| " 'DN ',\n", | |
| " 'AND',\n", | |
| " 'NN ',\n", | |
| " 'DAN',\n", | |
| " 'DAA',\n", | |
| " 'ADD']" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "permutations = make_permutations(string, n)\n", | |
| "permutations" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Scoring each permutation\n", | |
| "\n", | |
| "Create a score for each character in the string, an empty character is two zeros.\n", | |
| "\n", | |
| "```bash\n", | |
| "\" \" D N A\n", | |
| " 00 01 02 03\n", | |
| "```\n", | |
| "\n", | |
| "Then go through each permutation and append the score for each character.\n", | |
| "\n", | |
| "e.g for `ADN`:\n", | |
| "```bash\n", | |
| "A = 03\n", | |
| "D = 01\n", | |
| "N = 02\n", | |
| "\n", | |
| "ADN = 030102\n", | |
| "```\n", | |
| "\n", | |
| "--------\n", | |
| "\n", | |
| "Have to use two characters e.g `02` otherwise it fails on input strings longer than 10 characters." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[('ADN', 30102),\n", | |
| " ('AN ', 30200),\n", | |
| " ('AD ', 30100),\n", | |
| " ('DA ', 10300),\n", | |
| " ('NAN', 20302),\n", | |
| " ('DND', 10201),\n", | |
| " ('DDA', 10103),\n", | |
| " ('ANA', 30203),\n", | |
| " ('AAA', 30303),\n", | |
| " ('DD ', 10100),\n", | |
| " ('AA ', 30300),\n", | |
| " ('NND', 20201),\n", | |
| " ('N ', 20000),\n", | |
| " ('A ', 30000),\n", | |
| " ('DDN', 10102),\n", | |
| " ('ANN', 30202),\n", | |
| " ('AAN', 30302),\n", | |
| " ('ND ', 20100),\n", | |
| " ('ADA', 30103),\n", | |
| " ('NDD', 20101),\n", | |
| " ('NAA', 20303),\n", | |
| " ('NNN', 20202),\n", | |
| " ('DAD', 10301),\n", | |
| " ('NA ', 20300),\n", | |
| " ('DNN', 10202),\n", | |
| " ('D ', 10000),\n", | |
| " ('NDA', 20103),\n", | |
| " ('NAD', 20301),\n", | |
| " ('NNA', 20203),\n", | |
| " ('DDD', 10101),\n", | |
| " ('AAD', 30301),\n", | |
| " ('NDN', 20102),\n", | |
| " ('DNA', 10203),\n", | |
| " ('DN ', 10200),\n", | |
| " ('AND', 30201),\n", | |
| " ('NN ', 20200),\n", | |
| " ('DAN', 10302),\n", | |
| " ('DAA', 10303),\n", | |
| " ('ADD', 30101)]" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "def score_permutations(permutation_list, s):\n", | |
| " string = \"\".join(s)\n", | |
| " score_dict = {ch : str(i).zfill(2) for i, ch in enumerate(list(\" \" + string))}\n", | |
| " scores = []\n", | |
| " for i in permutation_list:\n", | |
| " string_score = \"\"\n", | |
| " for j in i:\n", | |
| " string_score += score_dict[j]\n", | |
| " scores.append(int(string_score))\n", | |
| " return zip(permutation_list, scores)\n", | |
| "\n", | |
| "scores = score_permutations(permutations, string)\n", | |
| "list(scores)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Now sort the permutations on their score." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[('D ', 10000),\n", | |
| " ('DD ', 10100),\n", | |
| " ('DDD', 10101),\n", | |
| " ('DDN', 10102),\n", | |
| " ('DDA', 10103),\n", | |
| " ('DN ', 10200),\n", | |
| " ('DND', 10201),\n", | |
| " ('DNN', 10202),\n", | |
| " ('DNA', 10203),\n", | |
| " ('DA ', 10300),\n", | |
| " ('DAD', 10301),\n", | |
| " ('DAN', 10302),\n", | |
| " ('DAA', 10303),\n", | |
| " ('N ', 20000),\n", | |
| " ('ND ', 20100),\n", | |
| " ('NDD', 20101),\n", | |
| " ('NDN', 20102),\n", | |
| " ('NDA', 20103),\n", | |
| " ('NN ', 20200),\n", | |
| " ('NND', 20201),\n", | |
| " ('NNN', 20202),\n", | |
| " ('NNA', 20203),\n", | |
| " ('NA ', 20300),\n", | |
| " ('NAD', 20301),\n", | |
| " ('NAN', 20302),\n", | |
| " ('NAA', 20303),\n", | |
| " ('A ', 30000),\n", | |
| " ('AD ', 30100),\n", | |
| " ('ADD', 30101),\n", | |
| " ('ADN', 30102),\n", | |
| " ('ADA', 30103),\n", | |
| " ('AN ', 30200),\n", | |
| " ('AND', 30201),\n", | |
| " ('ANN', 30202),\n", | |
| " ('ANA', 30203),\n", | |
| " ('AA ', 30300),\n", | |
| " ('AAD', 30301),\n", | |
| " ('AAN', 30302),\n", | |
| " ('AAA', 30303)]" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "sorted(score_permutations(permutations, string), key=lambda x: x[1])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Then just print the strings from the list of tuples." | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment