Created
August 5, 2022 18:30
-
-
Save cwood1967/1e257b131a8dfebfccaf4df0ced8b49d to your computer and use it in GitHub Desktop.
Searching with regex
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "e87ebdef-05bd-426d-bc6b-ffbff8376e17", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import re\n", | |
"import random\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "4160b5f6-aaf2-4b3d-b429-e38e3ceb015c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'A\\nA \\nA\\n\\n\\nA CA\\nAC \\nT \\nGGC\\nTAT\\nT \\nCCAG \\nTGA AATGG\\nCT A GA\\n\\nG CTG TTTGAACC\\nAC\\nACTGA\\nT GA AATGA\\nT ATT TTC AAT\\nGGTCC C TGCGA\\n CT\\nCATGTCAA\\nGGGT \\n T C\\nCCCTT G C TGGAG T\\n CG\\n \\nTC \\n\\nCGA CCA AAAGT\\nTGT CGGCC \\nCC\\nT\\nCT GA\\nGACA G TGC CGAG\\nTCC\\nTTA CC \\nCGTGG\\nACAATATGCT CG\\nGAA TC CGCCGGG\\nGTCC\\nC\\nA CG\\nT \\nAT\\nGGACG\\n\\nGAG\\n AT \\nGC\\nAAACACCAA TACGTCAG GC\\n AGGG\\nG\\nAT\\n\\nGGC A C\\n \\nC A CAT CCATCC\\nGGCC\\nATCC\\nTCA\\nAG C\\nCC\\nA '" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"''' Create a random string with GATC along with spaces and newlines\n", | |
"\n", | |
"random.choices : returns a list from a list of possible choices\n", | |
" \n", | |
"_s : list\n", | |
" list of strings with random choices\n", | |
" \n", | |
"s : str\n", | |
" the list _s joined with an empty string\n", | |
"'''\n", | |
"\n", | |
"_s = random.choices('GACT\\n ', k=400)\n", | |
"s = \"\".join(_s)\n", | |
"s" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "bca8eb7e-0808-4862-b9d3-e34161126a8d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'AAAACAACTGGCTATTCCAGTGAAATGGCTAGAGCTGTTTGAACCACACTGATGAAATGATATTTTCAATGGTCCCTGCGACTCATGTCAAGGGTTCCCCTTGCTGGAGTCGTCCGACCAAAAGTTGTCGGCCCCTCTGAGACAGTGCCGAGTCCTTACCCGTGGACAATATGCTCGGAATCCGCCGGGGTCCCACGTATGGACGGAGATGCAAACACCAATACGTCAGGCAGGGGATGGCACCACATCCATCCGGCCATCCTCAAGCCCA'" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"'''\n", | |
"use a regex to find all whitespace and replace with an empty string (\"\")\n", | |
"\n", | |
"\\s+ : matches one or more whitespace characters, e.g., \" \\n\\t \\n\" would be a match\n", | |
"re.sub : (what_to_sub, what_to_sub_with, String_to_use)\n", | |
"'''\n", | |
"\n", | |
"s2 = re.sub(r\"\\s+\", \"\", s)\n", | |
"s2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "e6c1acb2-213b-44b0-b208-8f637e9dc13e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"matches = re.finditer(\"G[AC][GT][CA]\", s2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "352ef7cd-2743-4deb-9bd2-1c7093e88745", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"10 14 GCTA\n", | |
"27 31 GCTA\n", | |
"31 35 GAGC\n", | |
"58 62 GATA\n", | |
"77 81 GCGA\n", | |
"138 142 GAGA\n", | |
"172 176 GCTC\n", | |
"205 209 GAGA\n" | |
] | |
} | |
], | |
"source": [ | |
"for m in matches:\n", | |
" print(m.start(), m.end(), s2[m.start():m.end()])\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "31df6f94-4d17-41f8-bd7f-aae029674069", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "maxwell", | |
"language": "python", | |
"name": "maxwell" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment