-
-
Save mullikine/9776fb19871cee0137e115c0f17618b3 to your computer and use it in GitHub Desktop.
Semantic similarity chatbots from plain-text files ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Semantic similarity chatbots from plain-text files\n", | |
"\n", | |
"By [Allison Parrish](http://www.decontextualize.com/)\n", | |
"\n", | |
"This needs copy, sorry :( :( :( But it works!" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"input_file = \"gutenberg_conversations.txt\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip install https://github.com/aparrish/semanticsimilaritychatbot/archive/master.zip" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from semanticsimilaritychatbot import SemanticSimilarityChatbot\n", | |
"import spacy\n", | |
"nlp = spacy.load('en_core_web_lg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"chatbot = SemanticSimilarityChatbot(nlp, 300)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"lastline = None\n", | |
"for line in open(input_file):\n", | |
" line = line.strip()\n", | |
" # empty lines mean \"end of conversation\"\n", | |
" if line == \"\":\n", | |
" lastline = None\n", | |
" continue\n", | |
" if lastline is not None:\n", | |
" chatbot.add_pair(lastline, line)\n", | |
" lastline = line" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"chatbot.build()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'You must not be called Tiny any more. It is an ugly name, and you are so very pretty. We will call you Maia.'" | |
] | |
}, | |
"execution_count": 41, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"chatbot.response_for(\"Hello there.\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip install flask" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"chatbot_html = \"\"\"\n", | |
"<style type=\"text/css\">#log p { margin: 5px; font-family: sans-serif; }</style>\n", | |
"<div id=\"log\"\n", | |
" style=\"box-sizing: border-box;\n", | |
" width: 600px;\n", | |
" height: 32em;\n", | |
" border: 1px grey solid;\n", | |
" padding: 2px;\n", | |
" overflow: scroll;\">\n", | |
"</div>\n", | |
"<input type=\"text\" id=\"typehere\" placeholder=\"type here!\"\n", | |
" style=\"box-sizing: border-box;\n", | |
" width: 600px;\n", | |
" margin-top: 5px;\">\n", | |
"<script>\n", | |
"function paraWithText(t) {\n", | |
" let tn = document.createTextNode(t);\n", | |
" let ptag = document.createElement('p');\n", | |
" ptag.appendChild(tn);\n", | |
" return ptag;\n", | |
"}\n", | |
"document.querySelector('#typehere').onchange = async function() {\n", | |
" let inputField = document.querySelector('#typehere');\n", | |
" let val = inputField.value;\n", | |
" inputField.value = \"\";\n", | |
" let resp = await getResp(val);\n", | |
" let objDiv = document.getElementById(\"log\");\n", | |
" objDiv.appendChild(paraWithText('😀: ' + val));\n", | |
" objDiv.appendChild(paraWithText('🤖: ' + resp));\n", | |
" objDiv.scrollTop = objDiv.scrollHeight;\n", | |
"};\n", | |
"async function getResp(val) {\n", | |
" let resp = await fetch(\"/response.json?sentence=\" + \n", | |
" encodeURIComponent(val));\n", | |
" let data = await resp.json();\n", | |
" return data['result'];\n", | |
"}\n", | |
"</script>\n", | |
"\"\"\"\n", | |
"from flask import Flask, request, jsonify\n", | |
"app = Flask(__name__)\n", | |
"@app.route(\"/response.json\")\n", | |
"def response():\n", | |
" sentence = request.args['sentence']\n", | |
" return jsonify(\n", | |
" {'result': chatbot.response_for(sentence)})\n", | |
"@app.route(\"/\")\n", | |
"def home():\n", | |
" return chatbot_html\n", | |
"app.run()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment