rndmcnlly · February 22, 2021 19:04
diff --git a/README.md b/README.md
diff --git a/apt.txt b/apt.txt
 jq
diff --git a/postBuild b/postBuild
 set -ex

 jupyter serverextension enable voila --sys-prefix

 wget http://visualgenome.org/static/data/dataset/image_data.json.zip
 wget http://visualgenome.org/static/data/dataset/relationships.json.zip
diff --git a/requirements.txt b/requirements.txt
 numpy
 voila
 ijson
diff --git a/SearchUserInterface.ipynb b/SearchUserInterface.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "id": "english-reality",
   "metadata": {},
   "source": [
    "# Simple Search Engine for Scenes\n",
    "\n",
    "A query like \"woman petting cat\" below will find scenes in the [VisualGenome dataset](http://visualgenome.org/) where there is a \"petting\" relationship with subject of type \"woman\" and object of type \"cat\" (query is split on spaces). Results are ranked by the fraction of query structure slots they match (words must match exactly to count). Because the matching is done at the level of relationships, a single scene may appear in the results multiple times when there are multiple relevant relationships in that scene."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "commercial-bangkok",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import zipfile\n",
    "import ijson\n",
    "import numpy as np\n",
    "import ipywidgets.widgets as ipw"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "urban-group",
   "metadata": {},
   "outputs": [],
   "source": [
    "with zipfile.ZipFile(\"image_data.json.zip\") as zf:\n",
    "    with zf.open(\"image_data.json\") as f:\n",
    "        image_data_json = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "prime-flexibility",
   "metadata": {},
   "outputs": [],
   "source": [
    "triples = []\n",
    "\n",
    "with zipfile.ZipFile(\"relationships.json.zip\") as zf:\n",
    "    with zf.open(\"relationships.json\") as f:\n",
    "        for index, scene in enumerate(ijson.items(f, 'item')):\n",
    "            for rel in scene['relationships']:\n",
    "                \n",
    "                s = rel['subject']\n",
    "                s_name = s.get('name') or s['names'][0]\n",
    "                \n",
    "                p_name = rel['predicate']\n",
    "            \n",
    "                o = rel['object']\n",
    "                o_name = o.get('name') or o['names'][0]\n",
    "        \n",
    "                triples.append((index, s_name, p_name, o_name))\n",
    "        \n",
    "#f'Extracted {len(triples)} triples from {len(image_data_json)} scenes.'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "voluntary-equality",
   "metadata": {},
   "outputs": [],
   "source": [
    "query_widget = ipw.Text(value=\"woman petting cat\")\n",
    "search_widget = ipw.Button(description='Search')\n",
    "result_widget = ipw.VBox()\n",
    "\n",
    "\n",
    "def score(q, d):\n",
    "    return ((q[1]==d[1]) + (q[2]==d[2]) + (q[3]==d[3]))/3\n",
    "\n",
    "def clicked_search(_):\n",
    "    \n",
    "    search_widget.disabled = True\n",
    "    \n",
    "    s_name, p_name, o_name = query_widget.value.split(' ')\n",
    "    query = (-1,s_name, p_name, o_name)\n",
    "    \n",
    "    result_widget.children = ()\n",
    "    \n",
    "    triple_scores = []\n",
    "    for triple in triples:\n",
    "        triple_scores.append(score(query, triple))\n",
    "    \n",
    "    triple_scores = np.array(triple_scores)\n",
    "    \n",
    "    top_indexes = np.argsort(-triple_scores)[:10]\n",
    "    result_chunks = []\n",
    "    for top_index in top_indexes:\n",
    "        item_score = triple_scores[top_index]\n",
    "        item_scene_index = triples[top_index][0]\n",
    "        item_url = image_data_json[item_scene_index]['url']\n",
    "        item = ipw.HTML(value=f'<a href=\"{item_url}\"><img src=\"{item_url}\" width=\"100\"></a>')\n",
    "        result_chunks.append(item)\n",
    "    result_widget.children = tuple(result_chunks)\n",
    "        \n",
    "    search_widget.disabled = False\n",
    "    \n",
    "\n",
    "search_widget.on_click(clicked_search)\n",
    "\n",
    "    \n",
    "ipw.VBox([\n",
    "    ipw.HBox([query_widget,search_widget]),\n",
    "    result_widget])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
	set -ex

	jupyter serverextension enable voila --sys-prefix

	wget http://visualgenome.org/static/data/dataset/image_data.json.zip
	wget http://visualgenome.org/static/data/dataset/relationships.json.zip
	{
	"cells": [
	{
	"cell_type": "markdown",
	"id": "english-reality",
	"metadata": {},
	"source": [
	"# Simple Search Engine for Scenes\n",
	"\n",
	"A query like \"woman petting cat\" below will find scenes in the [VisualGenome dataset](http://visualgenome.org/) where there is a \"petting\" relationship with subject of type \"woman\" and object of type \"cat\" (query is split on spaces). Results are ranked by the fraction of query structure slots they match (words must match exactly to count). Because the matching is done at the level of relationships, a single scene may appear in the results multiple times when there are multiple relevant relationships in that scene."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "commercial-bangkok",
	"metadata": {},
	"outputs": [],
	"source": [
	"import json\n",
	"import zipfile\n",
	"import ijson\n",
	"import numpy as np\n",
	"import ipywidgets.widgets as ipw"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "urban-group",
	"metadata": {},
	"outputs": [],
	"source": [
	"with zipfile.ZipFile(\"image_data.json.zip\") as zf:\n",
	" with zf.open(\"image_data.json\") as f:\n",
	" image_data_json = json.load(f)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "prime-flexibility",
	"metadata": {},
	"outputs": [],
	"source": [
	"triples = []\n",
	"\n",
	"with zipfile.ZipFile(\"relationships.json.zip\") as zf:\n",
	" with zf.open(\"relationships.json\") as f:\n",
	" for index, scene in enumerate(ijson.items(f, 'item')):\n",
	" for rel in scene['relationships']:\n",
	" \n",
	" s = rel['subject']\n",
	" s_name = s.get('name') or s['names'][0]\n",
	" \n",
	" p_name = rel['predicate']\n",
	" \n",
	" o = rel['object']\n",
	" o_name = o.get('name') or o['names'][0]\n",
	" \n",
	" triples.append((index, s_name, p_name, o_name))\n",
	" \n",
	"#f'Extracted {len(triples)} triples from {len(image_data_json)} scenes.'"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "voluntary-equality",
	"metadata": {},
	"outputs": [],
	"source": [
	"query_widget = ipw.Text(value=\"woman petting cat\")\n",
	"search_widget = ipw.Button(description='Search')\n",
	"result_widget = ipw.VBox()\n",
	"\n",
	"\n",
	"def score(q, d):\n",
	" return ((q[1]==d[1]) + (q[2]==d[2]) + (q[3]==d[3]))/3\n",
	"\n",
	"def clicked_search(_):\n",
	" \n",
	" search_widget.disabled = True\n",
	" \n",
	" s_name, p_name, o_name = query_widget.value.split(' ')\n",
	" query = (-1,s_name, p_name, o_name)\n",
	" \n",
	" result_widget.children = ()\n",
	" \n",
	" triple_scores = []\n",
	" for triple in triples:\n",
	" triple_scores.append(score(query, triple))\n",
	" \n",
	" triple_scores = np.array(triple_scores)\n",
	" \n",
	" top_indexes = np.argsort(-triple_scores)[:10]\n",
	" result_chunks = []\n",
	" for top_index in top_indexes:\n",
	" item_score = triple_scores[top_index]\n",
	" item_scene_index = triples[top_index][0]\n",
	" item_url = image_data_json[item_scene_index]['url']\n",
	" item = ipw.HTML(value=f'<a href=\"{item_url}\"><img src=\"{item_url}\" width=\"100\"></a>')\n",
	" result_chunks.append(item)\n",
	" result_widget.children = tuple(result_chunks)\n",
	" \n",
	" search_widget.disabled = False\n",
	" \n",
	"\n",
	"search_widget.on_click(clicked_search)\n",
	"\n",
	" \n",
	"ipw.VBox([\n",
	" ipw.HBox([query_widget,search_widget]),\n",
	" result_widget])"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.9"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}