Last active
March 15, 2021 18:26
-
-
Save fxadecimal/04688745f59827fe21d42fb8e3afe9a3 to your computer and use it in GitHub Desktop.
Find Strings in DF
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import csv\n", | |
"from io import StringIO\n", | |
"import re\n", | |
"\n", | |
"csv_text = \"\"\"id,description,sales\n", | |
"1, red checkered shirt xl, 10\n", | |
"2, green checkered shirt s, 3\n", | |
"3, red checkered shirt xl, 10\n", | |
"4, brown jumper m , 2\n", | |
"5, black t-shirt l, 2\n", | |
"6, white t-shirt xl, 5\n", | |
"\"\"\"\n", | |
"\n", | |
"COLORS = 'red green brown black white'.split(' ')\n", | |
"SIZES = 'xs s m l xl'.split(' ')\n", | |
"TYPES = 'hat t-shirt jumper shirt'.split(' ')\n", | |
"\n", | |
"CATEGORIES = {'colors':COLORS, 'sizes':SIZES, 'types':TYPES }\n", | |
"\n", | |
"\n", | |
"\n", | |
"f = StringIO(csv_text) # allows us to treat a string like a file\n", | |
"df = pd.read_csv(f)\n", | |
"df.index = df['id']\n", | |
"\n", | |
"def categorise_description(row, category_items):\n", | |
" for item in category_items: # loop through the defined \"tokens\" above\n", | |
" if item in row: # if the item is the description\n", | |
" return item # return\n", | |
" else:\n", | |
" continue\n", | |
" \n", | |
"\n", | |
"\n", | |
"# looping through the dictionary, categories\n", | |
"for category_name, category_items in CATEGORIES.items():\n", | |
" # append a column, category_name\n", | |
" \n", | |
" df[category_name] = df['description'].apply(lambda row: categorise_description(row, category_items))\n", | |
"\n", | |
" \n", | |
" \n", | |
"# print df\n", | |
"df\n", | |
"\n", | |
"\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment