Skip to content

Instantly share code, notes, and snippets.

@cwood1967
Created August 5, 2022 18:30
Show Gist options
  • Save cwood1967/1e257b131a8dfebfccaf4df0ced8b49d to your computer and use it in GitHub Desktop.
Save cwood1967/1e257b131a8dfebfccaf4df0ced8b49d to your computer and use it in GitHub Desktop.
Searching with regex
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e87ebdef-05bd-426d-bc6b-ffbff8376e17",
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"import random\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4160b5f6-aaf2-4b3d-b429-e38e3ceb015c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'A\\nA \\nA\\n\\n\\nA CA\\nAC \\nT \\nGGC\\nTAT\\nT \\nCCAG \\nTGA AATGG\\nCT A GA\\n\\nG CTG TTTGAACC\\nAC\\nACTGA\\nT GA AATGA\\nT ATT TTC AAT\\nGGTCC C TGCGA\\n CT\\nCATGTCAA\\nGGGT \\n T C\\nCCCTT G C TGGAG T\\n CG\\n \\nTC \\n\\nCGA CCA AAAGT\\nTGT CGGCC \\nCC\\nT\\nCT GA\\nGACA G TGC CGAG\\nTCC\\nTTA CC \\nCGTGG\\nACAATATGCT CG\\nGAA TC CGCCGGG\\nGTCC\\nC\\nA CG\\nT \\nAT\\nGGACG\\n\\nGAG\\n AT \\nGC\\nAAACACCAA TACGTCAG GC\\n AGGG\\nG\\nAT\\n\\nGGC A C\\n \\nC A CAT CCATCC\\nGGCC\\nATCC\\nTCA\\nAG C\\nCC\\nA '"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"''' Create a random string with GATC along with spaces and newlines\n",
"\n",
"random.choices : returns a list from a list of possible choices\n",
" \n",
"_s : list\n",
" list of strings with random choices\n",
" \n",
"s : str\n",
" the list _s joined with an empty string\n",
"'''\n",
"\n",
"_s = random.choices('GACT\\n ', k=400)\n",
"s = \"\".join(_s)\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "bca8eb7e-0808-4862-b9d3-e34161126a8d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'AAAACAACTGGCTATTCCAGTGAAATGGCTAGAGCTGTTTGAACCACACTGATGAAATGATATTTTCAATGGTCCCTGCGACTCATGTCAAGGGTTCCCCTTGCTGGAGTCGTCCGACCAAAAGTTGTCGGCCCCTCTGAGACAGTGCCGAGTCCTTACCCGTGGACAATATGCTCGGAATCCGCCGGGGTCCCACGTATGGACGGAGATGCAAACACCAATACGTCAGGCAGGGGATGGCACCACATCCATCCGGCCATCCTCAAGCCCA'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'''\n",
"use a regex to find all whitespace and replace with an empty string (\"\")\n",
"\n",
"\\s+ : matches one or more whitespace characters, e.g., \" \\n\\t \\n\" would be a match\n",
"re.sub : (what_to_sub, what_to_sub_with, String_to_use)\n",
"'''\n",
"\n",
"s2 = re.sub(r\"\\s+\", \"\", s)\n",
"s2"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e6c1acb2-213b-44b0-b208-8f637e9dc13e",
"metadata": {},
"outputs": [],
"source": [
"matches = re.finditer(\"G[AC][GT][CA]\", s2)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "352ef7cd-2743-4deb-9bd2-1c7093e88745",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 14 GCTA\n",
"27 31 GCTA\n",
"31 35 GAGC\n",
"58 62 GATA\n",
"77 81 GCGA\n",
"138 142 GAGA\n",
"172 176 GCTC\n",
"205 209 GAGA\n"
]
}
],
"source": [
"for m in matches:\n",
" print(m.start(), m.end(), s2[m.start():m.end()])\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "31df6f94-4d17-41f8-bd7f-aae029674069",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "maxwell",
"language": "python",
"name": "maxwell"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment