Skip to content

Instantly share code, notes, and snippets.

@joshcarter
Created July 23, 2019 16:32
Show Gist options
  • Save joshcarter/b19f99b4783d6ccccbf6021975b39394 to your computer and use it in GitHub Desktop.
Save joshcarter/b19f99b4783d6ccccbf6021975b39394 to your computer and use it in GitHub Desktop.
ElasticSearch Test Code
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ElasticSearch Tests\n",
"\n",
"- Assumes ES running locally\n",
"\n",
"- Uses MediaInfo JSON test data (see below)\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Imports\n",
"from datetime import datetime\n",
"from elasticsearch import Elasticsearch\n",
"import pandas as pd\n",
"from pathlib import Path\n",
"import json, random, string, uuid, datetime\n",
"\n",
"def letters(length=10):\n",
" \"\"\"Generate a random string of fixed length \"\"\"\n",
" chars = string.ascii_lowercase\n",
" return ''.join(random.choice(chars) for i in range(length))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load MediaInfo Test Data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo1.json\n",
"- title: Back to the Boat\n",
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo2.json\n",
"- title: Behind the Scenes\n",
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo3.json\n",
"- title: Captain's Tour: Inside the Boat\n",
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo4.json\n",
"- title: Maria's Take\n",
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo5.json\n",
"- title: The Battle of the Atlantic\n",
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo6.json\n",
"- title: The Perfect Boat: The Director's Cut\n",
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo7.json\n",
"- title: Das Boot (1981) Director's Cut\n"
]
}
],
"source": [
"# Load test data\n",
"es = Elasticsearch()\n",
"idx = 'test'\n",
"base_dir = Path('/Users/josh/Projects/jupyter_test/MediaInfoExamples')\n",
"media_info = []\n",
"general_info = []\n",
"\n",
"for i in range(7):\n",
" filename = f\"{base_dir}/MediaInfo{i+1}.json\"\n",
" print(f\"loading info from {filename}\")\n",
" with open(filename) as f:\n",
" mf = json.load(f)\n",
" gf = mf['media']['track'][0]\n",
" media_info.append(mf)\n",
" general_info.append(gf)\n",
" if 'Title' in gf.keys():\n",
" print(f\"- title: {gf['Title']}\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"inserting Back to the Boat\n",
"inserting Behind the Scenes\n",
"inserting Captain's Tour: Inside the Boat\n",
"inserting Maria's Take\n",
"inserting The Battle of the Atlantic\n",
"inserting The Perfect Boat: The Director's Cut\n",
"inserting Das Boot (1981) Director's Cut\n"
]
},
{
"data": {
"text/plain": [
"{'_shards': {'total': 2, 'successful': 1, 'failed': 0}}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Insert Test data into ES\n",
"for i in range(len(general_info)):\n",
" gf = general_info[i]\n",
" print(f\"inserting {gf['Title']}\")\n",
" es.index(index=idx, id=i, body=gf)\n",
"\n",
"es.indices.refresh(index=idx)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create Simple Test Data"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'_shards': {'total': 2, 'successful': 1, 'failed': 0}}"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def insert_test_data(index, n):\n",
" # make test data consistent from run to run\n",
" random.seed(0)\n",
"\n",
" # insert new data\n",
" for i in range(n):\n",
" meta = {\n",
" \"Title\": f\"object-{i}\",\n",
" \"Type\": random.choice([\"mp4\", \"aac\", \"mkv\", \"txt\"]),\n",
" \"FileSize\": random.randint(1000, 1000000),\n",
" }\n",
"\n",
" es.index(index=index, id=i, body=meta)\n",
"\n",
"idx2 = 'test'\n",
"insert_test_data(idx2, 1000)\n",
"es.indices.refresh(index=idx2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# MediaInfo Query and Search"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Back to the Boat'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Query by ID\n",
"es.get(index=idx, id=0)['_source']['Title']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>score</th>\n",
" <th>title</th>\n",
" <th>size</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.849856</td>\n",
" <td>Back to the Boat</td>\n",
" <td>1138580029</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.773912</td>\n",
" <td>Captain's Tour: Inside the Boat</td>\n",
" <td>249778146</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.710427</td>\n",
" <td>The Perfect Boat: The Director's Cut</td>\n",
" <td>369443217</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" score title size\n",
"0 0.849856 Back to the Boat 1138580029\n",
"1 0.773912 Captain's Tour: Inside the Boat 249778146\n",
"2 0.710427 The Perfect Boat: The Director's Cut 369443217"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Search by title\n",
"res = es.search(index=idx, body={\"query\": {\"match\": {\"Title\": \"Boat\"}}})\n",
"dat = []\n",
"for hit in res['hits']['hits']:\n",
" dat.append({\n",
" 'score': hit['_score'],\n",
" 'title': hit['_source']['Title'],\n",
" 'size': hit['_source']['FileSize'],\n",
" })\n",
"\n",
"pd.DataFrame(dat, columns=['score', 'title', 'size'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Simple Test Data Search"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>score</th>\n",
" <th>title</th>\n",
" <th>type</th>\n",
" <th>size</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.464175</td>\n",
" <td>object-2</td>\n",
" <td>mkv</td>\n",
" <td>537110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.464175</td>\n",
" <td>object-4</td>\n",
" <td>mkv</td>\n",
" <td>500748</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.464175</td>\n",
" <td>object-5</td>\n",
" <td>mkv</td>\n",
" <td>612720</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1.464175</td>\n",
" <td>object-10</td>\n",
" <td>mkv</td>\n",
" <td>954938</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.464175</td>\n",
" <td>object-14</td>\n",
" <td>mkv</td>\n",
" <td>496077</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>1.464175</td>\n",
" <td>object-19</td>\n",
" <td>mkv</td>\n",
" <td>66304</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>1.464175</td>\n",
" <td>object-24</td>\n",
" <td>mkv</td>\n",
" <td>256759</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1.464175</td>\n",
" <td>object-25</td>\n",
" <td>mkv</td>\n",
" <td>738822</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>1.464175</td>\n",
" <td>object-32</td>\n",
" <td>mkv</td>\n",
" <td>579045</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1.464175</td>\n",
" <td>object-33</td>\n",
" <td>mkv</td>\n",
" <td>741883</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" score title type size\n",
"0 1.464175 object-2 mkv 537110\n",
"1 1.464175 object-4 mkv 500748\n",
"2 1.464175 object-5 mkv 612720\n",
"3 1.464175 object-10 mkv 954938\n",
"4 1.464175 object-14 mkv 496077\n",
"5 1.464175 object-19 mkv 66304\n",
"6 1.464175 object-24 mkv 256759\n",
"7 1.464175 object-25 mkv 738822\n",
"8 1.464175 object-32 mkv 579045\n",
"9 1.464175 object-33 mkv 741883"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Search by type\n",
"res = es.search(index=idx2, body={\"query\": {\"match\": {\"Type\": \"mkv\"}}})\n",
"dat = []\n",
"for hit in res['hits']['hits']:\n",
" dat.append({\n",
" 'score': hit['_score'],\n",
" 'title': hit['_source']['Title'],\n",
" 'type': hit['_source']['Type'],\n",
" 'size': hit['_source']['FileSize'],\n",
" })\n",
"\n",
"pd.DataFrame(dat, columns=['score', 'title', 'type', 'size'])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"7 entries\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>score</th>\n",
" <th>title</th>\n",
" <th>type</th>\n",
" <th>size</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>object-66</td>\n",
" <td>mp4</td>\n",
" <td>106494</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>object-146</td>\n",
" <td>aac</td>\n",
" <td>106047</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>object-315</td>\n",
" <td>mp4</td>\n",
" <td>105030</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1.0</td>\n",
" <td>object-347</td>\n",
" <td>mp4</td>\n",
" <td>108563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.0</td>\n",
" <td>object-801</td>\n",
" <td>mkv</td>\n",
" <td>101983</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>1.0</td>\n",
" <td>object-814</td>\n",
" <td>mkv</td>\n",
" <td>104269</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>1.0</td>\n",
" <td>object-855</td>\n",
" <td>mkv</td>\n",
" <td>102247</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" score title type size\n",
"0 1.0 object-66 mp4 106494\n",
"1 1.0 object-146 aac 106047\n",
"2 1.0 object-315 mp4 105030\n",
"3 1.0 object-347 mp4 108563\n",
"4 1.0 object-801 mkv 101983\n",
"5 1.0 object-814 mkv 104269\n",
"6 1.0 object-855 mkv 102247"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Search by length\n",
"res = es.search(index=idx2, body={\"query\": {\"range\": {\"FileSize\": {\"gte\": 100000, \"lte\": 110000}}}})\n",
"dat = []\n",
"for hit in res['hits']['hits']:\n",
" dat.append({\n",
" 'score': hit['_score'],\n",
" 'title': hit['_source']['Title'],\n",
" 'type': hit['_source']['Type'],\n",
" 'size': hit['_source']['FileSize'],\n",
" })\n",
"\n",
"print(f'{len(dat)} entries')\n",
"pd.DataFrame(dat, columns=['score', 'title', 'type', 'size'])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'took': 3,\n",
" 'timed_out': False,\n",
" '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},\n",
" 'hits': {'total': {'value': 3, 'relation': 'eq'},\n",
" 'max_score': 1.0,\n",
" 'hits': [{'_shard': '[media_info2][0]',\n",
" '_node': 'BHkqVJoZTWCqJgx3hwbBUg',\n",
" '_index': 'media_info2',\n",
" '_type': '_doc',\n",
" '_id': '801',\n",
" '_score': 1.0,\n",
" '_source': {'Title': 'object-801', 'Type': 'mkv', 'FileSize': 101983},\n",
" '_explanation': {'value': 1.0,\n",
" 'description': 'FileSize:[100000 TO 105000]',\n",
" 'details': []}},\n",
" {'_shard': '[media_info2][0]',\n",
" '_node': 'BHkqVJoZTWCqJgx3hwbBUg',\n",
" '_index': 'media_info2',\n",
" '_type': '_doc',\n",
" '_id': '814',\n",
" '_score': 1.0,\n",
" '_source': {'Title': 'object-814', 'Type': 'mkv', 'FileSize': 104269},\n",
" '_explanation': {'value': 1.0,\n",
" 'description': 'FileSize:[100000 TO 105000]',\n",
" 'details': []}},\n",
" {'_shard': '[media_info2][0]',\n",
" '_node': 'BHkqVJoZTWCqJgx3hwbBUg',\n",
" '_index': 'media_info2',\n",
" '_type': '_doc',\n",
" '_id': '855',\n",
" '_score': 1.0,\n",
" '_source': {'Title': 'object-855', 'Type': 'mkv', 'FileSize': 102247},\n",
" '_explanation': {'value': 1.0,\n",
" 'description': 'FileSize:[100000 TO 105000]',\n",
" 'details': []}}]}}"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Explain search by length\n",
"es.search(index=idx2, body={\"explain\": True, \"size\": 25, \"query\": {\"range\": {\"FileSize\": {\"gte\": 100000, \"lte\": 105000}}}})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Leftovers"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# Alternate display using tabulate package\n",
"# from IPython.display import HTML, display\n",
"# import tabulate\n",
"# \n",
"# res = es.search(index=idx, body={\"query\": {\"match\": {\"Title\": \"Boat\"}}})\n",
"# dat = []\n",
"# for hit in res['hits']['hits']:\n",
"# dat.append({\n",
"# 'score': hit['_score'],\n",
"# 'title': hit['_source']['Title'],\n",
"# 'size': hit['_source']['FileSize'],\n",
"# })\n",
"#\n",
"# display(HTML(tabulate.tabulate(dat, tablefmt='html')))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'2019-01-25T00:00:00'"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"datetime.datetime(2019,1,random.randint(1,30)).isoformat()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'pvzljmteydmlqphuwlulnilmyywjdpjdoelhxfkphdvmmoqosthvmqjphkqvacpkmhnbsybncigxkfdfzwlahbamraedttgcxoga'"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"letters(100)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoTemplates/mkv.json\n"
]
},
{
"data": {
"text/plain": [
"'{\\n \"media\": {\\n \"@ref\": \"/Users/josh/Downloads/2019-05-01/Das.Boot.1981.Directors.Cut.Bluray.1080p.DTS-HD.x264-Grym/Das.Boot.1981.Directors.Cut.Bluray.1080p.DTS-HD.x264-Grym@BTNET/Das.Boot.Extras-Grym@BTNET/Back.to.the.Boat.(Doc)-Grym@BTNET/Back.to.the.Boat.(Doc)[email protected]\",\\n \"track\": [\\n {\\n \"@type\": \"General\",\\n \"UniqueID\": \"{uuid.uuid4()}\",\\n \"VideoCount\": \"1\",\\n \"AudioCount\": \"1\",\\n \"MenuCount\": \"1\",\\n \"FileExtension\": \"mkv\",\\n \"Format\": \"Matroska\",\\n \"Format_Version\": \"2\",\\n \"FileSize\": \"{random.randint(10000,1000000)}\",\\n \"Duration\": \"2688.436\",\\n \"OverallBitRate\": \"3388082\",\\n \"FrameRate\": \"23.976\",\\n \"FrameCount\": \"{random.randint(100,10000)}\",\\n \"StreamSize\": \"22656697\",\\n \"IsStreamable\": \"Yes\",\\n \"Title\": \"{letters(100)}\",\\n \"Movie\": \"{letters(50)}\",\\n \"Encoded_Date\": \"{datetime.datetime(2019,1,random.randint(1,30)).isoformat()}\",\\n \"File_Modified_Date\": \"UTC 2013-12-08 10:09:36\",\\n \"File_Modified_Date_Local\": \"2013-12-08 03:09:36\",\\n \"Encoded_Application\": \"DVDFab\",\\n \"Encoded_Library\": \"libebml v0.7.8 + libmatroska v0.8.1\"\\n },\\n {\\n \"@type\": \"Video\",\\n \"StreamOrder\": \"0\",\\n \"ID\": \"1\",\\n \"UniqueID\": \"329804448\",\\n \"Format\": \"AVC\",\\n \"Format_Profile\": \"High\",\\n \"Format_Level\": \"4.1\",\\n \"Format_Settings_CABAC\": \"Yes\",\\n \"Format_Settings_RefFrames\": \"2\",\\n \"CodecID\": \"V_MPEG4/ISO/AVC\",\\n \"Duration\": \"2688.438\",\\n \"BitRate\": \"2872659\",\\n \"Width\": \"1280\",\\n \"Height\": \"720\",\\n \"Sampled_Width\": \"1280\",\\n \"Sampled_Height\": \"720\",\\n \"PixelAspectRatio\": \"1.000\",\\n \"DisplayAspectRatio\": \"1.778\",\\n \"FrameRate_Mode\": \"CFR\",\\n \"FrameRate\": \"23.976\",\\n \"FrameCount\": \"64458\",\\n \"ColorSpace\": \"YUV\",\\n \"ChromaSubsampling\": \"4:2:0\",\\n \"BitDepth\": \"8\",\\n \"ScanType\": \"Progressive\",\\n \"Delay\": \"0.000\",\\n \"StreamSize\": \"965370916\",\\n \"Encoded_Library\": \"x264 - core\",\\n \"Encoded_Library_Name\": \"x264\",\\n \"Encoded_Library_Version\": \"core\",\\n \"Default\": \"Yes\",\\n \"Forced\": \"No\",\\n \"colour_description_present\": \"Yes\",\\n \"colour_description_present_Source\": \"Stream\",\\n \"colour_range\": \"Limited\",\\n \"colour_range_Source\": \"Stream\",\\n \"colour_primaries\": \"BT.709\",\\n \"colour_primaries_Source\": \"Stream\",\\n \"transfer_characteristics\": \"BT.709\",\\n \"transfer_characteristics_Source\": \"Stream\",\\n \"matrix_coefficients\": \"BT.709\",\\n \"matrix_coefficients_Source\": \"Stream\"\\n },\\n {\\n \"@type\": \"Audio\",\\n \"StreamOrder\": \"1\",\\n \"ID\": \"2\",\\n \"UniqueID\": \"857171869\",\\n \"Format\": \"AC-3\",\\n \"Format_Commercial_IfAny\": \"Dolby Digital\",\\n \"Format_Settings_Endianness\": \"Big\",\\n \"CodecID\": \"A_AC3\",\\n \"Duration\": \"2688.436\",\\n \"BitRate_Mode\": \"CBR\",\\n \"BitRate\": \"448000\",\\n \"Channels\": \"6\",\\n \"ChannelPositions\": \"Front: L C R, Side: L R, LFE\",\\n \"ChannelLayout\": \"L R C LFE Ls Rs\",\\n \"SamplesPerFrame\": \"1536\",\\n \"SamplingRate\": \"48000\",\\n \"SamplingCount\": \"129044928\",\\n \"FrameRate\": \"31.250\",\\n \"BitDepth\": \"16\",\\n \"Compression_Mode\": \"Lossy\",\\n \"Delay\": \"0.000\",\\n \"Delay_Source\": \"Container\",\\n \"StreamSize\": \"150552416\",\\n \"StreamSize_Proportion\": \"0.13223\",\\n \"Language\": \"en\",\\n \"ServiceKind\": \"CM\",\\n \"Default\": \"Yes\",\\n \"Forced\": \"No\",\\n \"extra\": {\\n \"bsid\": \"8\",\\n \"dialnorm\": \"-31\",\\n \"compr\": \"-0.28\",\\n \"acmod\": \"7\",\\n \"lfeon\": \"1\",\\n \"dialnorm_Average\": \"-31\",\\n \"dialnorm_Minimum\": \"-31\",\\n \"compr_Average\": \"0.42\",\\n \"compr_Minimum\": \"-2.50\",\\n \"compr_Maximum\": \"1.94\",\\n \"compr_Count\": \"334\",\\n \"dynrng_Average\": \"0.25\",\\n \"dynrng_Minimum\": \"-2.68\",\\n \"dynrng_Maximum\": \"1.94\",\\n \"dynrng_Count\": \"556\"\\n }\\n },\\n {\\n \"@type\": \"Menu\",\\n \"extra\": {\\n \"_00_00_00_000\": \"en:(01)00:00:00:000\",\\n \"_00_44_48_769\": \"en:(02)00:44:48:769\"\\n }\\n }\\n ]\\n }\\n}\\n'"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base_dir = Path('/Users/josh/Projects/jupyter_test/MediaInfoTemplates')\n",
"raw = ''\n",
"filename = f\"{base_dir}/mkv.json\"\n",
"print(f\"loading info from {filename}\")\n",
"with open(filename) as f:\n",
" raw = f.read()\n",
"\n",
"# string.Formatter.format(mf)\n",
"# mf = json.load(f)\n",
"raw# .format(**locals())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'key': 'rec-500',\n",
" 'letters': 'dgruaxbaaaaagaikwqabkxcanpilhwupedytitgdrwcrrckrhvocapkewubqaedycdvmdekmrlzxlotfjcfjcaibwbosfjwwlgpoyoljnfbhodarfhzidcyynijczqha',\n",
" 'number': 500}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Query by ID\n",
"es.get(index=\"es_test\", id=\"rec-500\")['_source']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 entries\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>score</th>\n",
" <th>key</th>\n",
" <th>number</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>rec-500</td>\n",
" <td>500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>rec-501</td>\n",
" <td>501</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>rec-502</td>\n",
" <td>502</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1.0</td>\n",
" <td>rec-503</td>\n",
" <td>503</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.0</td>\n",
" <td>rec-504</td>\n",
" <td>504</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>1.0</td>\n",
" <td>rec-505</td>\n",
" <td>505</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>1.0</td>\n",
" <td>rec-506</td>\n",
" <td>506</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1.0</td>\n",
" <td>rec-507</td>\n",
" <td>507</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>1.0</td>\n",
" <td>rec-508</td>\n",
" <td>508</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1.0</td>\n",
" <td>rec-509</td>\n",
" <td>509</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" score key number\n",
"0 1.0 rec-500 500\n",
"1 1.0 rec-501 501\n",
"2 1.0 rec-502 502\n",
"3 1.0 rec-503 503\n",
"4 1.0 rec-504 504\n",
"5 1.0 rec-505 505\n",
"6 1.0 rec-506 506\n",
"7 1.0 rec-507 507\n",
"8 1.0 rec-508 508\n",
"9 1.0 rec-509 509"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Search by length\n",
"res = es.search(index=\"es_test\", body={\"query\": {\"range\": {\"number\": {\"gte\": 500, \"lte\": 510}}}})\n",
"dat = []\n",
"for hit in res['hits']['hits']:\n",
" dat.append({\n",
" 'score': hit['_score'],\n",
" 'key': hit['_source']['key'],\n",
" 'number': hit['_source']['number'],\n",
" })\n",
"\n",
"print(f'{len(dat)} entries')\n",
"pd.DataFrame(dat, columns=['score', 'key', 'number'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment