Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jrjames83/df59457b64d66fb26543437450158ef0 to your computer and use it in GitHub Desktop.
Save jrjames83/df59457b64d66fb26543437450158ef0 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"slideshow": {
"slide_type": "skip"
}
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"# Get the data\n",
"* Find the most common 500 queries\n",
"* Find all the requests that use the common queries\n",
"* Find all the events clicked on from those requests\n",
"* Find the name and the description of those events"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"slideshow": {
"slide_type": "skip"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>queries</th>\n",
" <th>title</th>\n",
" <th>body</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>44792068</td>\n",
" <td>blockchain</td>\n",
" <td>Blockchain Smart Panels</td>\n",
" <td>&lt;h3 class=\"MsoNormal\"&gt;&lt;br /&gt;&lt;/h3&gt;\\r\\n&lt;h3 class...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>45819106</td>\n",
" <td>makeup classes,beauty</td>\n",
" <td>NARS Summer Mega Event</td>\n",
" <td>&lt;P&gt;&lt;SPAN&gt;Join NARS Cosmetics at Nordstrom Cent...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>45052684</td>\n",
" <td>wine tasting events,wine tasting</td>\n",
" <td>Italian Wine Tasting with Uggiano Winery</td>\n",
" <td>&lt;P CLASS=\"MsoNormal\"&gt;Nestled in the countrysid...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>44801981</td>\n",
" <td>soca</td>\n",
" <td>TURN UP FRIDAYS @ KINANM</td>\n",
" <td>&lt;P&gt;Turn up fridays @kinanm lounge... $100 2-4-...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>45775216</td>\n",
" <td>blockchain</td>\n",
" <td>Raw Haus: Design x Blockchain: Identity Manage...</td>\n",
" <td>&lt;P&gt;In the third of our&lt;SPAN&gt; series,&lt;/SPAN&gt; we...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>45614048</td>\n",
" <td>wine tasting events,wine tasting</td>\n",
" <td>APPELLATION - Wine Tasting, Meet the Makers</td>\n",
" <td>&lt;P&gt;&lt;SPAN&gt;There is really nothing more divisive...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>43253968</td>\n",
" <td>street fairs</td>\n",
" <td>Alma Street Fair 2018-Stall reservations</td>\n",
" <td>&lt;p style=\"text-align: center;\"&gt;&lt;span style=\"fo...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>45610786</td>\n",
" <td>volunteer events</td>\n",
" <td>TPS Cooking for Hope at Ronald McDonald House ...</td>\n",
" <td>&lt;P&gt;&lt;SPAN&gt;Back by popular demand! &lt;/SPAN&gt;&lt;SPAN&gt;...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>45344993</td>\n",
" <td>yoga</td>\n",
" <td>Sunday Yoga</td>\n",
" <td>&lt;P&gt;Join us for a complimentary yoga practice w...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>45639611</td>\n",
" <td>yoga</td>\n",
" <td>Rooftop Yoga benefitting the Alzheimer's Assoc...</td>\n",
" <td>&lt;P&gt;&lt;SPAN&gt;Join members of the Alzheimer's Assoc...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>44994292</td>\n",
" <td>crypto</td>\n",
" <td>Blockchain Conference Melbourne, Australia|Blo...</td>\n",
" <td>&lt;P&gt;A continuously list of growing records, cal...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>44104813</td>\n",
" <td>pop up</td>\n",
" <td>The Confetti Project: June Open Studios!</td>\n",
" <td>&lt;P&gt;Welcome to our JUNE Open Studios! We are op...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>46039495</td>\n",
" <td>party,white party</td>\n",
" <td>Third Annual White Party</td>\n",
" <td>&lt;P&gt;Third Annual WHITE PARTY Powered By Krave P...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>46121664</td>\n",
" <td>blockchain</td>\n",
" <td>Meet Your Business Half : Blockchain Rumble!</td>\n",
" <td>&lt;P&gt;&lt;STRONG&gt;&lt;EM&gt;Blockchain: The next revolution...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>44690920</td>\n",
" <td>trap and paint</td>\n",
" <td>Largest NC Trap &amp; Paint</td>\n",
" <td>&lt;P&gt;The LARGEST Trap &amp;amp; Paint event to ever ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>45355417</td>\n",
" <td>cryptocurrency</td>\n",
" <td>RevRally London</td>\n",
" <td>&lt;P&gt;Join us in London for our monthly RevRally ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>41677359</td>\n",
" <td>wine tasting events</td>\n",
" <td>2nd Annual All-Rosé Wine Tasting!</td>\n",
" <td>After our sold-out event last year, we're back...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>45255502</td>\n",
" <td>iftar</td>\n",
" <td>Iftar Dinner</td>\n",
" <td>&lt;P&gt;Muslims and Jews unite for peace! Please jo...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>45311970</td>\n",
" <td>yoga</td>\n",
" <td>Bender CHI // Sunset Yoga at Ace Hotel</td>\n",
" <td>&lt;P&gt;&lt;SPAN&gt;Friends! We're thrilled to return to ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>41259232</td>\n",
" <td>business</td>\n",
" <td>Workshop: New strategies in Business Analytics...</td>\n",
" <td>&lt;P&gt;The world of data analytics, business intel...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id queries \\\n",
"0 44792068 blockchain \n",
"1 45819106 makeup classes,beauty \n",
"2 45052684 wine tasting events,wine tasting \n",
"3 44801981 soca \n",
"4 45775216 blockchain \n",
"5 45614048 wine tasting events,wine tasting \n",
"6 43253968 street fairs \n",
"7 45610786 volunteer events \n",
"8 45344993 yoga \n",
"9 45639611 yoga \n",
"10 44994292 crypto \n",
"11 44104813 pop up \n",
"12 46039495 party,white party \n",
"13 46121664 blockchain \n",
"14 44690920 trap and paint \n",
"15 45355417 cryptocurrency \n",
"16 41677359 wine tasting events \n",
"17 45255502 iftar \n",
"18 45311970 yoga \n",
"19 41259232 business \n",
"\n",
" title \\\n",
"0 Blockchain Smart Panels \n",
"1 NARS Summer Mega Event \n",
"2 Italian Wine Tasting with Uggiano Winery \n",
"3 TURN UP FRIDAYS @ KINANM \n",
"4 Raw Haus: Design x Blockchain: Identity Manage... \n",
"5 APPELLATION - Wine Tasting, Meet the Makers \n",
"6 Alma Street Fair 2018-Stall reservations \n",
"7 TPS Cooking for Hope at Ronald McDonald House ... \n",
"8 Sunday Yoga \n",
"9 Rooftop Yoga benefitting the Alzheimer's Assoc... \n",
"10 Blockchain Conference Melbourne, Australia|Blo... \n",
"11 The Confetti Project: June Open Studios! \n",
"12 Third Annual White Party \n",
"13 Meet Your Business Half : Blockchain Rumble! \n",
"14 Largest NC Trap & Paint \n",
"15 RevRally London \n",
"16 2nd Annual All-Rosé Wine Tasting! \n",
"17 Iftar Dinner \n",
"18 Bender CHI // Sunset Yoga at Ace Hotel \n",
"19 Workshop: New strategies in Business Analytics... \n",
"\n",
" body \n",
"0 <h3 class=\"MsoNormal\"><br /></h3>\\r\\n<h3 class... \n",
"1 <P><SPAN>Join NARS Cosmetics at Nordstrom Cent... \n",
"2 <P CLASS=\"MsoNormal\">Nestled in the countrysid... \n",
"3 <P>Turn up fridays @kinanm lounge... $100 2-4-... \n",
"4 <P>In the third of our<SPAN> series,</SPAN> we... \n",
"5 <P><SPAN>There is really nothing more divisive... \n",
"6 <p style=\"text-align: center;\"><span style=\"fo... \n",
"7 <P><SPAN>Back by popular demand! </SPAN><SPAN>... \n",
"8 <P>Join us for a complimentary yoga practice w... \n",
"9 <P><SPAN>Join members of the Alzheimer's Assoc... \n",
"10 <P>A continuously list of growing records, cal... \n",
"11 <P>Welcome to our JUNE Open Studios! We are op... \n",
"12 <P>Third Annual WHITE PARTY Powered By Krave P... \n",
"13 <P><STRONG><EM>Blockchain: The next revolution... \n",
"14 <P>The LARGEST Trap &amp; Paint event to ever ... \n",
"15 <P>Join us in London for our monthly RevRally ... \n",
"16 After our sold-out event last year, we're back... \n",
"17 <P>Muslims and Jews unite for peace! Please jo... \n",
"18 <P><SPAN>Friends! We're thrilled to return to ... \n",
"19 <P>The world of data analytics, business intel... "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.read_csv('data/tagging_with_searches_1.tsv', names=['id','queries', 'title', 'body'], sep='\\t')\n",
"data.head(20)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create training set\n",
"\n",
"* Remove XML and then set asside a test set."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"import numpy as np\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"XML_RE = re.compile(r'<[^>]+>|&\\w+;')\n",
"\n",
"titles = data['title']\n",
"bodies = data['body'].apply(lambda d: XML_RE.sub(' ', d))\n",
"tagses = data['queries']\n",
"\n",
"titles_train, titles_test, bodies_train, bodies_test, tagses_train, tagses_test = train_test_split(\n",
" titles,\n",
" bodies,\n",
" tagses,\n",
" test_size=100,\n",
" random_state=42,\n",
") "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prepare the feature vectors, `X`\n",
"* Process `title` and `body` fields into sparse TF*IDF vectors."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.feature_extraction.text import TfidfTransformer\n",
"from sklearn.pipeline import Pipeline\n",
"\n",
"title_processor = Pipeline([\n",
" ('vect', CountVectorizer(stop_words='english', min_df=19)),\n",
" ('tfidf', TfidfTransformer(use_idf=True)),\n",
"])\n",
"tfidf_titles_train = title_processor.fit_transform(titles_train)\n",
"tfidf_titles_test = title_processor.transform(titles_test)\n",
"\n",
"body_processor = Pipeline([\n",
" ('vect', CountVectorizer(stop_words='english', min_df=9)),\n",
" ('tfidf', TfidfTransformer(use_idf=True)),\n",
"])\n",
"tfidf_bodies_train = body_processor.fit_transform(bodies_train)\n",
"tfidf_bodies_test = body_processor.transform(bodies_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* Convert them into dense vectors suitable for training Neural Networks."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"dense_tfidf_titles_train = tfidf_titles_train.todense()\n",
"dense_tfidf_titles_test = tfidf_titles_test.todense()\n",
"dense_tfidf_bodies_train = tfidf_bodies_train.todense()\n",
"dense_tfidf_bodies_test = tfidf_bodies_test.todense()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* Concatenate `title` and `body` vectors into a single vector."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"X_train = np.concatenate((dense_tfidf_titles_train, dense_tfidf_bodies_train), axis=1)\n",
"X_test = np.concatenate((dense_tfidf_titles_test, dense_tfidf_bodies_test), axis=1)\n",
"X_test_raw = np.core.defchararray.add(np.core.defchararray.add(list(titles_test), '\\n'), bodies_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's take a look at the results."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"X_train.shape: (14422, 13877) \n",
"\n",
"num non-zero elements: 54 \n",
"\n",
"non-zero elements:\n",
" [[0.25926518 0.59145486 0.47701804 0.46712164 0.37042393 0.05991176\n",
" 0.10517516 0.12397836 0.10993582 0.11624109 0.09743092 0.1096012\n",
" 0.09387606 0.14281727 0.21403938 0.13223437 0.08779476 0.13270574\n",
" 0.09329427 0.16406408 0.07798627 0.09499815 0.13404607 0.03956563\n",
" 0.11790491 0.0859697 0.0881039 0.13549669 0.16867474 0.1439813\n",
" 0.086571 0.10815557 0.12839434 0.57204539 0.12462782 0.11430594\n",
" 0.12944188 0.17296817 0.17734905 0.06020619 0.06602291 0.09752642\n",
" 0.09932067 0.07767455 0.14521064 0.09534196 0.16971504 0.07612504\n",
" 0.10226218 0.10854766 0.1193775 0.07632421 0.15624812 0.06046931]]\n"
]
}
],
"source": [
"print('X_train.shape:', X_train.shape, '\\n')\n",
"print('num non-zero elements:', np.count_nonzero(X_train[0]), '\\n')\n",
"row_i, col_i = np.nonzero(X_train[0])\n",
"print('non-zero elements:\\n', X_train[0][row_i, col_i])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prepare target vectors, `y`\n",
"* Process the queries into sparse binary vectors.\n",
"* Convert them to dense vectors."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"tags_processor = CountVectorizer(analyzer=lambda q: q.split(','))\n",
"vect_tagses_train = tags_processor.fit_transform(tagses_train)\n",
"vect_tagses_test = tags_processor.transform(tagses_test)\n",
"\n",
"y_train = vect_tagses_train.todense()\n",
"y_test = vect_tagses_test.todense()\n",
"y_test_raw = tagses_test"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's take a look at the results."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"y_train.shape: (14422, 449) \n",
"\n",
"num non-zero elements: 2 \n",
"\n",
"non-zero elements:\n",
" [[1 1]]\n"
]
}
],
"source": [
"print('y_train.shape:', y_train.shape, '\\n')\n",
"print('num non-zero elements:', np.count_nonzero(y_train[2]), '\\n')\n",
"row_i, col_i = np.nonzero(y_train[2])\n",
"print('non-zero elements:\\n', y_train[2][row_i, col_i])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Build Model"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
" from ._conv import register_converters as _register_converters\n",
"Using TensorFlow backend.\n"
]
}
],
"source": [
"from keras.layers import Input, Dense\n",
"from keras.models import load_model, Model\n",
"\n",
"model_file = 'models/tagging_with_searches_1.mdl'\n",
"try:\n",
" model = load_model(model_file)\n",
"except OSError:\n",
" inputs = Input(\n",
" shape=(X_train.shape[1],), \n",
" dtype='float', \n",
" name='inputs',\n",
" )\n",
" middle = Dense(y_train.shape[1], name='middle', activation='relu')(inputs)\n",
" outputs = Dense(y_train.shape[1], name='outputs')(middle)\n",
" \n",
" model = Model(inputs=inputs, outputs=outputs)\n",
" model.compile(optimizer='rmsprop', loss='mean_squared_error')\n",
" \n",
" model.fit(X_train, y_train, epochs=20)\n",
" model.save(model_file)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# See how well it works!"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"predicted: ['beauty']\n",
"truth: beauty\n",
"original text:\n",
"Around The World Beauty \"Beauty + Travel Meet-up\"\n",
" L et's meet-up after hours to celebra\n",
"\n",
"----------------------------------------------\n",
"predicted: ['art', 'business', 'meditation', 'mindfulness']\n",
"truth: leadership seminars\n",
"original text:\n",
"The Art of Mindful Leadership\n",
" M indfulness has become a buzz word, and for good reason. T\n",
"\n",
"----------------------------------------------\n",
"predicted: ['vendor', 'vendors needed']\n",
"truth: bridal show\n",
"original text:\n",
"Bakersfield&#39;s Premier Bridal Show \n",
" Join us for Bakersfield's Premier Bridal Show! \n",
" \n",
"\n",
"----------------------------------------------\n",
"predicted: ['5k run']\n",
"truth: 5k run\n",
"original text:\n",
"Flamingo Day 5K -Phoenix\n",
" FLAMINGO DAY 5K \r\n",
" *THIS IS A VIRTUAL RACE! \r\n",
" \r\n",
" \r\n",
" Did you \n",
"\n",
"----------------------------------------------\n",
"predicted: ['memorial day']\n",
"truth: memorial day events\n",
"original text:\n",
"Keepin It Grown & Sexy\n",
" Houston's Ultimate Couples Day  Party!  This event will be at F\n",
"\n",
"----------------------------------------------\n",
"predicted: ['anime']\n",
"truth: comic con\n",
"original text:\n",
"WhedonCon 2018\n",
" WhedonCon 2018  is growing! To keep up with meeting our fans’ needs, GO T\n",
"\n",
"----------------------------------------------\n",
"predicted: ['mothers day']\n",
"truth: mothers day\n",
"original text:\n",
"Mother's Day 5K - Running 4 You Super Mom! -Dallas\n",
" MOTHER S DAY 5K RUNNING 4 YOU SUPER M\n",
"\n",
"----------------------------------------------\n",
"predicted: ['sip and paint', 'trap and paint']\n",
"truth: sip and paint,trap and paint\n",
"original text:\n",
"Trap Paint Sip SOLD OUT\n",
" Paint Sip Socialize is an art studio offering painting classes. Br\n",
"\n",
"----------------------------------------------\n",
"predicted: ['job fair']\n",
"truth: job fair\n",
"original text:\n",
"West Tucson Career Fair\n",
" Find a great job in Tucson on Wednesday, May 30, 2018, from 11 AM\n",
"\n",
"----------------------------------------------\n",
"predicted: ['calligraphy']\n",
"truth: calligraphy\n",
"original text:\n",
"Beginers Modern Calligraphy Class\n",
" Our modern pointed-pen beginner calligraphy workshops pr\n",
"\n",
"----------------------------------------------\n",
"predicted: ['kids']\n",
"truth: kids\n",
"original text:\n",
"Cupcake Decorating Workshop for Kids (under 16)\n",
" Fancy something fun and different to do d\n",
"\n",
"----------------------------------------------\n",
"predicted: ['sip and paint']\n",
"truth: paint and sip,paint,sip and paint\n",
"original text:\n",
"PAINT SIP \n",
" JOIN US FOR PAINT SIP  \n",
" COMPLEMENTARY DRINK  \n",
" ALL MATERIALS INCLUDED \n",
" HAPPY \n",
"\n",
"----------------------------------------------\n",
"predicted: ['business networking', 'networking']\n",
"truth: networking\n",
"original text:\n",
"NetworkNite Los Angeles Speed Networking Event For Business Professionals\n",
" Business Relati\n",
"\n",
"----------------------------------------------\n",
"predicted: ['cannabis', 'memorial day weekend']\n",
"truth: cannabis\n",
"original text:\n",
"Tokyo Smoke Presents: Higher Learning: Cannabis Strains Explained\n",
" With hundreds of cannabi\n",
"\n",
"----------------------------------------------\n",
"predicted: ['black', 'film']\n",
"truth: black\n",
"original text:\n",
"EMPOWERING THE BLACK AND AFRICAN FILMMAKER\n",
" \n",
" \n",
" BLACK TO BUSINESS AND KUUMBA MEDIA INV\n",
"\n",
"----------------------------------------------\n",
"predicted: ['singles party', 'speed dating events']\n",
"truth: speed date\n",
"original text:\n",
"Singles Speed Dating Event In Houston\n",
" A low key, sophisticated approach to  dating in Hous\n",
"\n",
"----------------------------------------------\n",
"predicted: ['day party', 'memorial day', 'memorial day weekend']\n",
"truth: memorial day,memorial day weekend events,memorial day weekend,day party\n",
"original text:\n",
"MEMORIAL DAY DAY PARTY @LEVEL UPTOWN \n",
" •Doors open at NOON \n",
" •$1 Mimosas until 3pm. \n",
" •Bott\n",
"\n",
"----------------------------------------------\n",
"predicted: ['blockchain', 'crypto']\n",
"truth: crypto\n",
"original text:\n",
"Crypto Career Day\n",
" Crypto Career Day is an event for those who are considering a career i\n",
"\n",
"----------------------------------------------\n",
"predicted: ['420']\n",
"truth: cyber security\n",
"original text:\n",
"2018 DC CyberWeek\n",
" Presented by CyberScoop, DC CyberWeek is a SXSW-style, city-wide fest\n",
"\n",
"----------------------------------------------\n",
"predicted: ['business', 'education conference', 'finance']\n",
"truth: finance conference\n",
"original text:\n",
"Conference on Global Business, Economics, Finance and Social Sciences (GVC) \n",
" Ninth Europea\n",
"\n",
"----------------------------------------------\n",
"predicted: ['cannabis']\n",
"truth: cannabis\n",
"original text:\n",
"The Love In Festival\n",
" Celebrate. Connect. Create. Experience The Love In!! \n",
" A celebratio\n",
"\n",
"----------------------------------------------\n",
"predicted: ['crossfit', 'women']\n",
"truth: crossfit\n",
"original text:\n",
"Women's Throwdown 8\n",
" Every year, Women's Throwdown has been an event which women come toge\n",
"\n",
"----------------------------------------------\n",
"predicted: ['business', 'business networking', 'networking']\n",
"truth: business\n",
"original text:\n",
"Circle of Firms: Business to Business Networking | May 2018\n",
" May Edition:  \n",
" Networking Ope\n",
"\n",
"----------------------------------------------\n",
"predicted: ['networking']\n",
"truth: networking\n",
"original text:\n",
"Network After Work Los Angeles at The Phoenix\n",
" Network After Work invites you to an evenin\n",
"\n",
"----------------------------------------------\n",
"predicted: ['career fair', 'job fair']\n",
"truth: job fair,job fairs\n",
"original text:\n",
"Community Career & Resource Fair\n",
" Come and meet potential employers, get information on inf\n",
"\n",
"----------------------------------------------\n",
"predicted: ['property']\n",
"truth: property\n",
"original text:\n",
"Property Question Time \n",
" Auction House London invites you to the pre-auction \n",
"\n",
"----------------------------------------------\n",
"predicted: ['silent party']\n",
"truth: silent party\n",
"original text:\n",
"Silent Xperience Sat. 3 Channels HIP HOP, Top 40 Silent Disco Ladies Night!\n",
" \r\n",
" \"\n",
"\n",
"----------------------------------------------\n",
"predicted: ['gdpr']\n",
"truth: gdpr\n",
"original text:\n",
"GDPR Training\n",
" General Data Protection Regulations (GDPR) is aimed at businesses that rece\n",
"\n",
"----------------------------------------------\n",
"predicted: []\n",
"truth: kids\n",
"original text:\n",
"Wildflower-Arranging Workshop with Ellie Hartley Flowers and Culture Whisper\n",
" We've teamed \n",
"\n",
"----------------------------------------------\n",
"predicted: ['islam', 'muslim', 'networking']\n",
"truth: muslim\n",
"original text:\n",
"Muslims in Pennsylvania - Creating Community\n",
" When Muslim slaves were brought into the U.S\n",
"\n",
"----------------------------------------------\n",
"predicted: ['memorial day weekend', 'soca', 'trap and paint']\n",
"truth: trap and paint\n",
"original text:\n",
"TRAP OUT!! The Werk Out!!\n",
" Lets Dance!! Lets Move That Body!! \n",
" LOCATION CHANGE!! LOCATI\n",
"\n",
"----------------------------------------------\n",
"predicted: ['sex', 'sex party']\n",
"truth: sex party\n",
"original text:\n",
"Temple Dancer Play Party\n",
" Have you ever fantasized about being immersed and inspired by a s\n",
"\n",
"----------------------------------------------\n",
"predicted: ['business networking']\n",
"truth: business networking\n",
"original text:\n",
"HOUSTON BUSINESS NETWORK\n",
" If you're searching for a great networking opportunity, loo\n",
"\n",
"----------------------------------------------\n",
"predicted: ['mothers day']\n",
"truth: mothers day\n",
"original text:\n",
"Party in the Park\n",
" Join us at our 3rd Party in the Park to celebrate the official launchi\n",
"\n",
"----------------------------------------------\n",
"predicted: ['doterra', 'health and wellness']\n",
"truth: doterra\n",
"original text:\n",
"Wellness Summit 2018 - Calgary, AB CAN\n",
" 4:00 - 6:00 PM  Empowered Success Business Traini\n",
"\n",
"----------------------------------------------\n",
"predicted: ['goat yoga', 'yoga']\n",
"truth: goat yoga\n",
"original text:\n",
"Goat Yoga at the Omni Dallas\n",
" Join Dallas Fitness Ambassadors for a special Goat Yoga event\n",
"\n",
"----------------------------------------------\n",
"predicted: ['free', 'memorial day weekend events', 'yacht party']\n",
"truth: july\n",
"original text:\n",
"Grown FolksUnder The Stars Yacht Party\n",
" On July 13, 2018 we're taking the dance floor to t\n",
"\n",
"----------------------------------------------\n",
"predicted: ['wine', 'wine tasting', 'wine tasting events']\n",
"truth: wine\n",
"original text:\n",
"Digital Learning Wine Tasting\n",
" Senior HR Executive Wine Tasting Event. Senior representativ\n",
"\n",
"----------------------------------------------\n",
"predicted: ['free']\n",
"truth: free\n",
"original text:\n",
"Free Open House on Melrose\n",
" Join us at our free open house to see for yourself why we are L\n",
"\n",
"----------------------------------------------\n",
"predicted: ['architecture', 'boat party', 'business']\n",
"truth: architecture\n",
"original text:\n",
"Architecture & Design Film Festival at A’18 presents BIG TIME - For A'18 Attendees\n",
" Accordi\n",
"\n",
"----------------------------------------------\n",
"predicted: ['day party', 'memorial day weekend', 'memorial day weekend events']\n",
"truth: day party\n",
"original text:\n",
"Seersuckers & Sundresses 2018: The Day Party Part III\n",
" Seersuckers Sundresses 2018: The \n",
"\n",
"----------------------------------------------\n",
"predicted: ['boat party', 'day party']\n",
"truth: day party\n",
"original text:\n",
"THE DECK SUNDAYS | A ROSEBAR DAY PARTY\n",
" Every Sunday Funday join us at \"The Deck\" Sundays a\n",
"\n",
"----------------------------------------------\n",
"predicted: ['comedy']\n",
"truth: stand up comedy\n",
"original text:\n",
"Downtown Comedy Night: A San Francisco Stand Up Comedy Show\n",
" Like to laugh? Like prestigiou\n",
"\n",
"----------------------------------------------\n",
"predicted: ['calligraphy']\n",
"truth: calligraphy\n",
"original text:\n",
"Brush Pen Calligraphy Workshop\n",
" Brush pens are easy, fuss-free and a terrific tool to get \n",
"\n",
"----------------------------------------------\n",
"predicted: ['black', 'fashion', 'fashion show', 'food truck festival']\n",
"truth: fashion show,fashion shows\n",
"original text:\n",
"FOOD MEETS FASHION \"The Shape of BLACK Dinner Party\" Atlanta\n",
" FOOD meets FASHION 7 DESI\n",
"\n",
"----------------------------------------------\n",
"predicted: ['singles']\n",
"truth: boat party,singles party\n",
"original text:\n",
"SINGLES CRUISE PARTY \n",
" Join us aboard the 100-foot, two-level party yacht The Cabana, as w\n",
"\n",
"----------------------------------------------\n",
"predicted: ['pool party', 'pride']\n",
"truth: pride,nude\n",
"original text:\n",
"WET DREAMZ LUXURY EDITION - DC BLACK PRIDE 2018\n",
" DARYL WILSON PROMOTION PRESENTS  \n",
" \n",
"\n",
"\n",
"----------------------------------------------\n",
"predicted: ['memorial day events', 'memorial day weekend', 'memorial day weekend events']\n",
"truth: memorial day weekend\n",
"original text:\n",
"Memorial Day Weekend Crab Fest\n",
" Come by and enjoy our Memorial Day Weekend Crab Fest weeken\n",
"\n",
"----------------------------------------------\n",
"predicted: ['day party', 'memorial day weekend events']\n",
"truth: day party\n",
"original text:\n",
"KLASS ANFLANNEUR DAY PARTY\n",
" Come and Enjoy the Day Party in NYC with a Spectacular Performa\n",
"\n",
"----------------------------------------------\n",
"predicted: ['carnival', 'memorial day weekend events']\n",
"truth: afrobeats\n",
"original text:\n",
"Cirque de l'Afrique - Berlin Carnival Special\n",
" Cirque de l'Afrique - Berlin Carnival Edit\n",
"\n",
"----------------------------------------------\n",
"predicted: ['party', 'sex party', 'soca']\n",
"truth: sex party,pool party,silent party\n",
"original text:\n",
"#Hashtag The Party\n",
" #Hashtag the Party is the opening event for Ace Weekend annually held i\n",
"\n",
"----------------------------------------------\n",
"predicted: ['nude']\n",
"truth: nude\n",
"original text:\n",
"PEGASUS - Art Festival\n",
" Nude Photography, Contemporary/ Abstract Art, Dance Performances mi\n",
"\n",
"----------------------------------------------\n",
"predicted: ['mental health']\n",
"truth: mental health\n",
"original text:\n",
"VIBE SESSIONS celebrates Mental Health Awareness Month\n",
" Vibe Sessions  is a sisterhood of \n",
"\n",
"----------------------------------------------\n",
"predicted: ['summer camp']\n",
"truth: football camps\n",
"original text:\n",
"GO Camp Oak Cliff\n",
" Go Camp is a one-of-a-kind summer day camp experience offering safe spa\n",
"\n",
"----------------------------------------------\n",
"predicted: ['pool party']\n",
"truth: pool party\n",
"original text:\n",
"WET Miami Pool Parties Saturdays\n",
" \n",
" Miami Nightlife Pool Parties \n",
" Present.\n",
"\n",
"----------------------------------------------\n",
"predicted: ['mothers day']\n",
"truth: mothers day\n",
"original text:\n",
"A Mother's Gift: An Evening with Jennifer Hudson\n",
" Academy and Grammy Award-winning singer \n",
"\n",
"----------------------------------------------\n",
"predicted: ['fashion']\n",
"truth: fashion\n",
"original text:\n",
"Fashion and Tailoring Open Day\n",
" Newham College’s Fashion and Tailoring department is holdin\n",
"\n",
"----------------------------------------------\n",
"predicted: ['cinco de mayo', 'yoga']\n",
"truth: cinco de mayo\n",
"original text:\n",
"Tipsy Trap Yoga: Cinco de Mayo Edition sponsored by: Tito's Homemade Vodka\n",
" Let's turn up o\n",
"\n",
"----------------------------------------------\n",
"predicted: ['blockchain']\n",
"truth: blockchain\n",
"original text:\n",
"Blockchain for the Automotive Industry Masterclass\n",
" \n",
" \n",
" This is a one-day, instructor led, \n",
"\n",
"----------------------------------------------\n",
"predicted: ['fintech']\n",
"truth: fintech\n",
"original text:\n",
"Startupbootcamp InsurTech 2018 Demo Day - Converge\n",
" \n",
" \n",
" \n",
" Join us for Startupbootcamp I\n",
"\n",
"----------------------------------------------\n",
"predicted: ['blockchain']\n",
"truth: blockchain\n",
"original text:\n",
"Blockchain: in practice\n",
" Blockchain: in practice explores the practical applications of \n",
"\n",
"----------------------------------------------\n",
"predicted: ['memorial day weekend events']\n",
"truth: memorial day weekend events\n",
"original text:\n",
"Coast In Commons Weekend Market Place\n",
" Coast In Commons is Walkers Point newest event space\n",
"\n",
"----------------------------------------------\n",
"predicted: ['4th of july party', 'amazura', 'carnival', 'foam party']\n",
"truth: foam party,amazura\n",
"original text:\n",
"FOAM FETE 2018 - NYC JULY 4TH EDITION\n",
" #FOAMFETE2018 NYC JULY 4TH EDITION \r\n",
" \r\n",
" TUESD\n",
"\n",
"----------------------------------------------\n",
"predicted: ['prom']\n",
"truth: swinger party\n",
"original text:\n",
"Adult Prom \n",
" \n",
"\n",
"----------------------------------------------\n",
"predicted: ['food', 'vendor']\n",
"truth: vendors needed,music festivals,festivals\n",
"original text:\n",
"East Atlanta Village Music & Food Festival \n",
" \n",
" \n",
" \n",
" Attention Atlanta, GA !!! Attention All\n",
"\n",
"----------------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"predicted: ['car show', 'classic car show', 'memorial day weekend events']\n",
"truth: car show\n",
"original text:\n",
"SATURDAY 9th JUNE 2018\n",
"\n",
"The BGS Classic &amp; Custom Show\n",
"The BGS Classic/Custom is a small\n",
"\n",
"----------------------------------------------\n",
"predicted: ['vegan']\n",
"truth: vegan\n",
"original text:\n",
"Jules Aron Book Launch @ Manna Life Food\n",
" Another FREE high-vibe event is upcoming at Man\n",
"\n",
"----------------------------------------------\n",
"predicted: []\n",
"truth: hip hop\n",
"original text:\n",
"TRAVIS SCOTT @ OHM Nightclub 18+ MDW Sunday No Work or School Next Day!\n",
" Memorial Day Weeke\n",
"\n",
"----------------------------------------------\n",
"predicted: ['memorial day weekend', 'party', 'pool party']\n",
"truth: day party\n",
"original text:\n",
"B2B Mansion Party Take 1\n",
" The Profectus Group Presents the #Back2Back Mansion/Pool Party \n",
" \n",
"\n",
"----------------------------------------------\n",
"predicted: ['wine', 'wine tasting events']\n",
"truth: food\n",
"original text:\n",
"International Wine and Fine Food Expo 2018\n",
" International Wine Fine Food Expo is a trade\n",
"\n",
"----------------------------------------------\n",
"predicted: ['food truck festival']\n",
"truth: wrestling\n",
"original text:\n",
"SIR MO ULTIMATE 8 INVITATIONAL TOURNAMENT\n",
" Sir Mo has invited what he considers 8 of the to\n",
"\n",
"----------------------------------------------\n",
"predicted: ['memorial day weekend', 'mothers day', 'rave party']\n",
"truth: harry potter\n",
"original text:\n",
"Trivia Night - Potter Edition\n",
" Who’s the biggest Potter-head in town? Test your wizard wits\n",
"\n",
"----------------------------------------------\n",
"predicted: ['business networking']\n",
"truth: networking\n",
"original text:\n",
"Newcastle Power Business Breakfast Club - August\n",
" Looking to make new connections? Be ready\n",
"\n",
"----------------------------------------------\n",
"predicted: ['career fair', 'job fair']\n",
"truth: career\n",
"original text:\n",
"Student Career Expo\n",
" VIRTUAL REALITY HUB FREE GOODIE BAG WIN A DRONE JOB INTERVIEW MASTERC\n",
"\n",
"----------------------------------------------\n",
"predicted: ['cannabis']\n",
"truth: cannabis\n",
"original text:\n",
"Cultivating Your Cannabis Career Path: A Panel Discussion\n",
" “Cultivating Your Cannabis Caree\n",
"\n",
"----------------------------------------------\n",
"predicted: []\n",
"truth: burlesque show\n",
"original text:\n",
"Rag Bag Cabaret at The Stray Dog \n",
" \n",
" After four sold out shows in Orleans, Almonte and Car\n",
"\n",
"----------------------------------------------\n",
"predicted: ['iftar', 'muslim']\n",
"truth: iftar\n",
"original text:\n",
"Rohingya Iftar\n",
" Join fellow Muslims and the team at Insaaf for an iftar dinner at Little P\n",
"\n",
"----------------------------------------------\n",
"predicted: []\n",
"truth: vendors needed\n",
"original text:\n",
"CHARGE UP SURVIVORS CONFERENCE\n",
"   \n",
" All Roads Lead to Atlanta for the  Inaugural Surviv\n",
"\n",
"----------------------------------------------\n",
"predicted: ['car show', 'car shows']\n",
"truth: car shows\n",
"original text:\n",
"BUY TICKETS TO IMPORTFEST 2018 - CANADA'S BIGGEST & BADDEST CAR SHOW\n",
" BUY ADVANCE TICKETS \n",
"\n",
"----------------------------------------------\n",
"predicted: ['mothers day']\n",
"truth: mothers day\n",
"original text:\n",
"Sip + Celebrate for Mother's Day!\n",
" Armoire, The Buttermilk Company, The Riveter, and Sanaya\n",
"\n",
"----------------------------------------------\n",
"predicted: ['fashion', 'vegan']\n",
"truth: fashion\n",
"original text:\n",
"Bare Fashion\n",
" Award-winning magazine Vegan Food Living is teaming up with the fabulo\n",
"\n",
"----------------------------------------------\n",
"predicted: ['business networking']\n",
"truth: networking\n",
"original text:\n",
"New Tampa Business Networking \n",
" The only thing better than connecting top professionals in\n",
"\n",
"----------------------------------------------\n",
"predicted: ['venture capital']\n",
"truth: venture capital\n",
"original text:\n",
"New York 2018 Venture Capital World Summit\n",
" \r\n",
" These events are  the Venture Capital \n",
"\n",
"----------------------------------------------\n",
"predicted: []\n",
"truth: entrepreneur\n",
"original text:\n",
"Entrepreneurship: The Highs, The Lows (and The Pivots!)\n",
" Thinking about starting your own b\n",
"\n",
"----------------------------------------------\n",
"predicted: ['swinger party', 'swingers party']\n",
"truth: swinger party\n",
"original text:\n",
"Swinger 101 - Couples Only\n",
" New to the lifestyle? Just curious about being a swinger? \r\n",
" S\n",
"\n",
"----------------------------------------------\n",
"predicted: ['agile']\n",
"truth: agile\n",
"original text:\n",
"Scrum & Agile Culture\n",
" SCRUM CULTURE IS A WINING COLLABORATIVE TEAM \n",
" Scrum es un marco de \n",
"\n",
"----------------------------------------------\n",
"predicted: ['cannabis']\n",
"truth: cannabis\n",
"original text:\n",
"HEMP/CBD OIL EDUCATION-GET HEALTHY NOT HIGH!\n",
" There’s been much debate suggesting that CBD \n",
"\n",
"----------------------------------------------\n",
"predicted: ['google']\n",
"truth: google\n",
"original text:\n",
"Google Cloud Summit '18 Paris\n",
" Venez nous rencontrer lors du Google Cloud Summit , le 5 ju\n",
"\n",
"----------------------------------------------\n",
"predicted: ['job fair']\n",
"truth: job fairs,job fair\n",
"original text:\n",
"2019 Chicago Career Fair. \n",
" Join us on April 18, 2019 for the Chicago Career Fair.  \n",
" M\n",
"\n",
"----------------------------------------------\n",
"predicted: []\n",
"truth: car shows\n",
"original text:\n",
"FWHLR Presents: The Escape \n",
" \n",
" \n",
" \n",
" \n",
" THE ESCAPE \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" On SATURDAY, join us for\n",
"\n",
"----------------------------------------------\n",
"predicted: ['meditation', 'yoga']\n",
"truth: yoga retreat\n",
"original text:\n",
"Jul. 6 & 7 Retreat in the Temple: Meditation, Movement & Yoga Community Building\n",
" We are p\n",
"\n",
"----------------------------------------------\n",
"predicted: ['speed dating', 'speed dating events']\n",
"truth: speed dating\n",
"original text:\n",
"**MEN SOLD OUT**Relish Speed Dating | Speed Dating Austin | Saturday Night\n",
" It's all about\n",
"\n",
"----------------------------------------------\n",
"predicted: ['brunch', 'day party']\n",
"truth: day party\n",
"original text:\n",
"Secrets Brunch & Day Party\n",
" Da Culture Ent Presnts  \n",
" Secret Brunch Day Party \n",
" Brunch St\n",
"\n",
"----------------------------------------------\n",
"predicted: ['marketing', 'networking']\n",
"truth: marketing\n",
"original text:\n",
"Content Marketing talk & networking \n",
" This content marketing talk will be led by Finchley's\n",
"\n",
"----------------------------------------------\n",
"predicted: ['car show', 'car shows', 'classic car show']\n",
"truth: car show\n",
"original text:\n",
"Golden Wheel Car Cruse\n",
" All proceeds go to American Foundation for Suicide Prevention \n",
" \n",
"\n",
"----------------------------------------------\n",
"predicted: ['yoga']\n",
"truth: yoga\n",
"original text:\n",
"The Happy Yoga Club\n",
" Experience the soothing benefits of yoga in a welcoming, informal atmo\n",
"\n",
"----------------------------------------------\n",
"predicted: ['memorial day weekend parties', 'pool party']\n",
"truth: pool party,pool\n",
"original text:\n",
"Las Vegas Nightlife & Pool Party Pass by Destination Coupons\n",
" Lit Vegas is the most connec\n",
"\n",
"----------------------------------------------\n",
"predicted: ['vendor', 'vendor events', 'vendors needed']\n",
"truth: food festivals\n",
"original text:\n",
"LAB and LOVE Festival Vending \n",
" Join us for a day full of arts, wellness, and entertainmen\n",
"\n",
"----------------------------------------------\n",
"predicted: ['car show', 'car shows', 'classic car show']\n",
"truth: car show\n",
"original text:\n",
"2018 Vancouver All British Classic Car Show @ VanDusen Garden\n",
" Dubbed “T he Greatest Show o\n",
"\n",
"----------------------------------------------\n",
"predicted: ['salsa', 'singles', 'singles party', 'yoga']\n",
"truth: single party\n",
"original text:\n",
"Singles Salsa\n",
" Dating events in London, huh? Same old, same old? \n",
" Well, start rubbing your\n",
"\n",
"----------------------------------------------\n"
]
}
],
"source": [
"def sample(i, limit=None, top_n=None):\n",
" if limit:\n",
" predicted = sorted(list(tags_processor.inverse_transform((model.predict(X_test[i])>limit)+0.0))[0])\n",
" elif top_n: \n",
" pass\n",
" else:\n",
" raise Exception('Either limit or top_n must be specified')\n",
" print('predicted: {}'.format(predicted))\n",
" print('truth: {}'.format(list(y_test_raw)[i]))\n",
" print('original text:\\n{}\\n'.format(list(X_test_raw)[i]))\n",
" print('----------------------------------------------')\n",
" \n",
"for i in range(100):\n",
" sample(i, limit = 0.2)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment