Skip to content

Instantly share code, notes, and snippets.

@BenLangmead
Created September 10, 2013 17:26
Show Gist options
  • Save BenLangmead/6512698 to your computer and use it in GitHub Desktop.
Save BenLangmead/6512698 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": "CG_001_StringBasics1"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": "'A'",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 1,
"text": "'A'"
}
],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": "'ACGT'",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 2,
"text": "'ACGT'"
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": "st = 'ACGT'",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": "len(st) # getting the length of a string",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 6,
"text": "4"
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": "'' # empty string (epsilon)",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 24,
"text": "''"
}
],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": "len('')",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 25,
"text": "0"
}
],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": "import random\nrandom.choice('ACGT') # generating a random nucleotide",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 7,
"text": "'T'"
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": "random.choice('ACGT') # repeated invocations might yield different nucleotides",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 8,
"text": "'G'"
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": "random.choice('ACGT') # repeated invocations might yield different nucleotides",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 9,
"text": "'G'"
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": "random.choice('ACGT') # repeated invocations might yield different nucleotides",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 10,
"text": "'G'"
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": "random.choice('ACGT') # repeated invocations might yield different nucleotides",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 11,
"text": "'C'"
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": "# now I'll make a random nucleotide string by concatenating random nucleotides\nst = ''.join([random.choice('ACGT') for _ in xrange(40)])\nst",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 13,
"text": "'CTACATACGACAAGTCTTCGAAAGAGCCTATCAATTGCTC'"
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": "st[1:3] # substring, starting at position 1 and extending up to but not including position 3\n# note that the first position is numbered 0",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 15,
"text": "'TA'"
}
],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": "st[0:3] # prefix of length 3",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 16,
"text": "'CTA'"
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": "st[:3] # another way of getting the prefix of length 3",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 17,
"text": "'CTA'"
}
],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": "st[len(st)-3:len(st)] # suffix of length 3",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 18,
"text": "'CTC'"
}
],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": "st[-3:] # another way of getting the suffix of length 3",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 19,
"text": "'CTC'"
}
],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": "st1, st2 = 'CAT', 'ATAC'",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": "st1",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 21,
"text": "'CAT'"
}
],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": "st2",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 22,
"text": "'ATAC'"
}
],
"prompt_number": 22
},
{
"cell_type": "code",
"collapsed": false,
"input": "st1 + st2 # concatenation of 2 strings",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 23,
"text": "'CATATAC'"
}
],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": "",
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment