Created
July 18, 2018 22:27
-
-
Save aparrish/92c25d4f57eb7186670e03105a991508 to your computer and use it in GitHub Desktop.
Semantic similarity chatbots from plain-text files. Code examples released under CC0 https://creativecommons.org/choose/zero/, other text released under CC BY 4.0 https://creativecommons.org/licenses/by/4.0/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Semantic similarity chatbots from plain-text files\n", | |
"\n", | |
"By [Allison Parrish](http://www.decontextualize.com/)\n", | |
"\n", | |
"This needs copy, sorry :( :( :( But it works!" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"input_file = \"gutenberg_conversations.txt\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip install https://github.com/aparrish/semanticsimilaritychatbot/archive/master.zip" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from semanticsimilaritychatbot import SemanticSimilarityChatbot\n", | |
"import spacy\n", | |
"nlp = spacy.load('en_core_web_lg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"chatbot = SemanticSimilarityChatbot(nlp, 300)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"lastline = None\n", | |
"for line in open(input_file):\n", | |
" line = line.strip()\n", | |
" # empty lines mean \"end of conversation\"\n", | |
" if line == \"\":\n", | |
" lastline = None\n", | |
" continue\n", | |
" if lastline is not None:\n", | |
" chatbot.add_pair(lastline, line)\n", | |
" lastline = line" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"chatbot.build()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'You must not be called Tiny any more. It is an ugly name, and you are so very pretty. We will call you Maia.'" | |
] | |
}, | |
"execution_count": 41, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"chatbot.response_for(\"Hello there.\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip install flask" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"chatbot_html = \"\"\"\n", | |
"<style type=\"text/css\">#log p { margin: 5px; font-family: sans-serif; }</style>\n", | |
"<div id=\"log\"\n", | |
" style=\"box-sizing: border-box;\n", | |
" width: 600px;\n", | |
" height: 32em;\n", | |
" border: 1px grey solid;\n", | |
" padding: 2px;\n", | |
" overflow: scroll;\">\n", | |
"</div>\n", | |
"<input type=\"text\" id=\"typehere\" placeholder=\"type here!\"\n", | |
" style=\"box-sizing: border-box;\n", | |
" width: 600px;\n", | |
" margin-top: 5px;\">\n", | |
"<script>\n", | |
"function paraWithText(t) {\n", | |
" let tn = document.createTextNode(t);\n", | |
" let ptag = document.createElement('p');\n", | |
" ptag.appendChild(tn);\n", | |
" return ptag;\n", | |
"}\n", | |
"document.querySelector('#typehere').onchange = async function() {\n", | |
" let inputField = document.querySelector('#typehere');\n", | |
" let val = inputField.value;\n", | |
" inputField.value = \"\";\n", | |
" let resp = await getResp(val);\n", | |
" let objDiv = document.getElementById(\"log\");\n", | |
" objDiv.appendChild(paraWithText('😀: ' + val));\n", | |
" objDiv.appendChild(paraWithText('🤖: ' + resp));\n", | |
" objDiv.scrollTop = objDiv.scrollHeight;\n", | |
"};\n", | |
"async function getResp(val) {\n", | |
" let resp = await fetch(\"/response.json?sentence=\" + \n", | |
" encodeURIComponent(val));\n", | |
" let data = await resp.json();\n", | |
" return data['result'];\n", | |
"}\n", | |
"</script>\n", | |
"\"\"\"\n", | |
"from flask import Flask, request, jsonify\n", | |
"app = Flask(__name__)\n", | |
"@app.route(\"/response.json\")\n", | |
"def response():\n", | |
" sentence = request.args['sentence']\n", | |
" return jsonify(\n", | |
" {'result': chatbot.response_for(sentence)})\n", | |
"@app.route(\"/\")\n", | |
"def home():\n", | |
" return chatbot_html\n", | |
"app.run()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment