Created
July 23, 2019 16:32
-
-
Save joshcarter/b19f99b4783d6ccccbf6021975b39394 to your computer and use it in GitHub Desktop.
ElasticSearch Test Code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# ElasticSearch Tests\n", | |
"\n", | |
"- Assumes ES running locally\n", | |
"\n", | |
"- Uses MediaInfo JSON test data (see below)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Imports\n", | |
"from datetime import datetime\n", | |
"from elasticsearch import Elasticsearch\n", | |
"import pandas as pd\n", | |
"from pathlib import Path\n", | |
"import json, random, string, uuid, datetime\n", | |
"\n", | |
"def letters(length=10):\n", | |
" \"\"\"Generate a random string of fixed length \"\"\"\n", | |
" chars = string.ascii_lowercase\n", | |
" return ''.join(random.choice(chars) for i in range(length))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Load MediaInfo Test Data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo1.json\n", | |
"- title: Back to the Boat\n", | |
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo2.json\n", | |
"- title: Behind the Scenes\n", | |
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo3.json\n", | |
"- title: Captain's Tour: Inside the Boat\n", | |
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo4.json\n", | |
"- title: Maria's Take\n", | |
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo5.json\n", | |
"- title: The Battle of the Atlantic\n", | |
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo6.json\n", | |
"- title: The Perfect Boat: The Director's Cut\n", | |
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo7.json\n", | |
"- title: Das Boot (1981) Director's Cut\n" | |
] | |
} | |
], | |
"source": [ | |
"# Load test data\n", | |
"es = Elasticsearch()\n", | |
"idx = 'test'\n", | |
"base_dir = Path('/Users/josh/Projects/jupyter_test/MediaInfoExamples')\n", | |
"media_info = []\n", | |
"general_info = []\n", | |
"\n", | |
"for i in range(7):\n", | |
" filename = f\"{base_dir}/MediaInfo{i+1}.json\"\n", | |
" print(f\"loading info from {filename}\")\n", | |
" with open(filename) as f:\n", | |
" mf = json.load(f)\n", | |
" gf = mf['media']['track'][0]\n", | |
" media_info.append(mf)\n", | |
" general_info.append(gf)\n", | |
" if 'Title' in gf.keys():\n", | |
" print(f\"- title: {gf['Title']}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"inserting Back to the Boat\n", | |
"inserting Behind the Scenes\n", | |
"inserting Captain's Tour: Inside the Boat\n", | |
"inserting Maria's Take\n", | |
"inserting The Battle of the Atlantic\n", | |
"inserting The Perfect Boat: The Director's Cut\n", | |
"inserting Das Boot (1981) Director's Cut\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"{'_shards': {'total': 2, 'successful': 1, 'failed': 0}}" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Insert Test data into ES\n", | |
"for i in range(len(general_info)):\n", | |
" gf = general_info[i]\n", | |
" print(f\"inserting {gf['Title']}\")\n", | |
" es.index(index=idx, id=i, body=gf)\n", | |
"\n", | |
"es.indices.refresh(index=idx)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Create Simple Test Data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'_shards': {'total': 2, 'successful': 1, 'failed': 0}}" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"def insert_test_data(index, n):\n", | |
" # make test data consistent from run to run\n", | |
" random.seed(0)\n", | |
"\n", | |
" # insert new data\n", | |
" for i in range(n):\n", | |
" meta = {\n", | |
" \"Title\": f\"object-{i}\",\n", | |
" \"Type\": random.choice([\"mp4\", \"aac\", \"mkv\", \"txt\"]),\n", | |
" \"FileSize\": random.randint(1000, 1000000),\n", | |
" }\n", | |
"\n", | |
" es.index(index=index, id=i, body=meta)\n", | |
"\n", | |
"idx2 = 'test'\n", | |
"insert_test_data(idx2, 1000)\n", | |
"es.indices.refresh(index=idx2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# MediaInfo Query and Search" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'Back to the Boat'" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Query by ID\n", | |
"es.get(index=idx, id=0)['_source']['Title']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>score</th>\n", | |
" <th>title</th>\n", | |
" <th>size</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.849856</td>\n", | |
" <td>Back to the Boat</td>\n", | |
" <td>1138580029</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.773912</td>\n", | |
" <td>Captain's Tour: Inside the Boat</td>\n", | |
" <td>249778146</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.710427</td>\n", | |
" <td>The Perfect Boat: The Director's Cut</td>\n", | |
" <td>369443217</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" score title size\n", | |
"0 0.849856 Back to the Boat 1138580029\n", | |
"1 0.773912 Captain's Tour: Inside the Boat 249778146\n", | |
"2 0.710427 The Perfect Boat: The Director's Cut 369443217" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Search by title\n", | |
"res = es.search(index=idx, body={\"query\": {\"match\": {\"Title\": \"Boat\"}}})\n", | |
"dat = []\n", | |
"for hit in res['hits']['hits']:\n", | |
" dat.append({\n", | |
" 'score': hit['_score'],\n", | |
" 'title': hit['_source']['Title'],\n", | |
" 'size': hit['_source']['FileSize'],\n", | |
" })\n", | |
"\n", | |
"pd.DataFrame(dat, columns=['score', 'title', 'size'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Simple Test Data Search" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>score</th>\n", | |
" <th>title</th>\n", | |
" <th>type</th>\n", | |
" <th>size</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1.464175</td>\n", | |
" <td>object-2</td>\n", | |
" <td>mkv</td>\n", | |
" <td>537110</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1.464175</td>\n", | |
" <td>object-4</td>\n", | |
" <td>mkv</td>\n", | |
" <td>500748</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1.464175</td>\n", | |
" <td>object-5</td>\n", | |
" <td>mkv</td>\n", | |
" <td>612720</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1.464175</td>\n", | |
" <td>object-10</td>\n", | |
" <td>mkv</td>\n", | |
" <td>954938</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1.464175</td>\n", | |
" <td>object-14</td>\n", | |
" <td>mkv</td>\n", | |
" <td>496077</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>1.464175</td>\n", | |
" <td>object-19</td>\n", | |
" <td>mkv</td>\n", | |
" <td>66304</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>1.464175</td>\n", | |
" <td>object-24</td>\n", | |
" <td>mkv</td>\n", | |
" <td>256759</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>1.464175</td>\n", | |
" <td>object-25</td>\n", | |
" <td>mkv</td>\n", | |
" <td>738822</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>1.464175</td>\n", | |
" <td>object-32</td>\n", | |
" <td>mkv</td>\n", | |
" <td>579045</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>1.464175</td>\n", | |
" <td>object-33</td>\n", | |
" <td>mkv</td>\n", | |
" <td>741883</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" score title type size\n", | |
"0 1.464175 object-2 mkv 537110\n", | |
"1 1.464175 object-4 mkv 500748\n", | |
"2 1.464175 object-5 mkv 612720\n", | |
"3 1.464175 object-10 mkv 954938\n", | |
"4 1.464175 object-14 mkv 496077\n", | |
"5 1.464175 object-19 mkv 66304\n", | |
"6 1.464175 object-24 mkv 256759\n", | |
"7 1.464175 object-25 mkv 738822\n", | |
"8 1.464175 object-32 mkv 579045\n", | |
"9 1.464175 object-33 mkv 741883" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Search by type\n", | |
"res = es.search(index=idx2, body={\"query\": {\"match\": {\"Type\": \"mkv\"}}})\n", | |
"dat = []\n", | |
"for hit in res['hits']['hits']:\n", | |
" dat.append({\n", | |
" 'score': hit['_score'],\n", | |
" 'title': hit['_source']['Title'],\n", | |
" 'type': hit['_source']['Type'],\n", | |
" 'size': hit['_source']['FileSize'],\n", | |
" })\n", | |
"\n", | |
"pd.DataFrame(dat, columns=['score', 'title', 'type', 'size'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"7 entries\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>score</th>\n", | |
" <th>title</th>\n", | |
" <th>type</th>\n", | |
" <th>size</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1.0</td>\n", | |
" <td>object-66</td>\n", | |
" <td>mp4</td>\n", | |
" <td>106494</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1.0</td>\n", | |
" <td>object-146</td>\n", | |
" <td>aac</td>\n", | |
" <td>106047</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1.0</td>\n", | |
" <td>object-315</td>\n", | |
" <td>mp4</td>\n", | |
" <td>105030</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1.0</td>\n", | |
" <td>object-347</td>\n", | |
" <td>mp4</td>\n", | |
" <td>108563</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1.0</td>\n", | |
" <td>object-801</td>\n", | |
" <td>mkv</td>\n", | |
" <td>101983</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>1.0</td>\n", | |
" <td>object-814</td>\n", | |
" <td>mkv</td>\n", | |
" <td>104269</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>1.0</td>\n", | |
" <td>object-855</td>\n", | |
" <td>mkv</td>\n", | |
" <td>102247</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" score title type size\n", | |
"0 1.0 object-66 mp4 106494\n", | |
"1 1.0 object-146 aac 106047\n", | |
"2 1.0 object-315 mp4 105030\n", | |
"3 1.0 object-347 mp4 108563\n", | |
"4 1.0 object-801 mkv 101983\n", | |
"5 1.0 object-814 mkv 104269\n", | |
"6 1.0 object-855 mkv 102247" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Search by length\n", | |
"res = es.search(index=idx2, body={\"query\": {\"range\": {\"FileSize\": {\"gte\": 100000, \"lte\": 110000}}}})\n", | |
"dat = []\n", | |
"for hit in res['hits']['hits']:\n", | |
" dat.append({\n", | |
" 'score': hit['_score'],\n", | |
" 'title': hit['_source']['Title'],\n", | |
" 'type': hit['_source']['Type'],\n", | |
" 'size': hit['_source']['FileSize'],\n", | |
" })\n", | |
"\n", | |
"print(f'{len(dat)} entries')\n", | |
"pd.DataFrame(dat, columns=['score', 'title', 'type', 'size'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'took': 3,\n", | |
" 'timed_out': False,\n", | |
" '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},\n", | |
" 'hits': {'total': {'value': 3, 'relation': 'eq'},\n", | |
" 'max_score': 1.0,\n", | |
" 'hits': [{'_shard': '[media_info2][0]',\n", | |
" '_node': 'BHkqVJoZTWCqJgx3hwbBUg',\n", | |
" '_index': 'media_info2',\n", | |
" '_type': '_doc',\n", | |
" '_id': '801',\n", | |
" '_score': 1.0,\n", | |
" '_source': {'Title': 'object-801', 'Type': 'mkv', 'FileSize': 101983},\n", | |
" '_explanation': {'value': 1.0,\n", | |
" 'description': 'FileSize:[100000 TO 105000]',\n", | |
" 'details': []}},\n", | |
" {'_shard': '[media_info2][0]',\n", | |
" '_node': 'BHkqVJoZTWCqJgx3hwbBUg',\n", | |
" '_index': 'media_info2',\n", | |
" '_type': '_doc',\n", | |
" '_id': '814',\n", | |
" '_score': 1.0,\n", | |
" '_source': {'Title': 'object-814', 'Type': 'mkv', 'FileSize': 104269},\n", | |
" '_explanation': {'value': 1.0,\n", | |
" 'description': 'FileSize:[100000 TO 105000]',\n", | |
" 'details': []}},\n", | |
" {'_shard': '[media_info2][0]',\n", | |
" '_node': 'BHkqVJoZTWCqJgx3hwbBUg',\n", | |
" '_index': 'media_info2',\n", | |
" '_type': '_doc',\n", | |
" '_id': '855',\n", | |
" '_score': 1.0,\n", | |
" '_source': {'Title': 'object-855', 'Type': 'mkv', 'FileSize': 102247},\n", | |
" '_explanation': {'value': 1.0,\n", | |
" 'description': 'FileSize:[100000 TO 105000]',\n", | |
" 'details': []}}]}}" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Explain search by length\n", | |
"es.search(index=idx2, body={\"explain\": True, \"size\": 25, \"query\": {\"range\": {\"FileSize\": {\"gte\": 100000, \"lte\": 105000}}}})" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Leftovers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Alternate display using tabulate package\n", | |
"# from IPython.display import HTML, display\n", | |
"# import tabulate\n", | |
"# \n", | |
"# res = es.search(index=idx, body={\"query\": {\"match\": {\"Title\": \"Boat\"}}})\n", | |
"# dat = []\n", | |
"# for hit in res['hits']['hits']:\n", | |
"# dat.append({\n", | |
"# 'score': hit['_score'],\n", | |
"# 'title': hit['_source']['Title'],\n", | |
"# 'size': hit['_source']['FileSize'],\n", | |
"# })\n", | |
"#\n", | |
"# display(HTML(tabulate.tabulate(dat, tablefmt='html')))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'2019-01-25T00:00:00'" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"datetime.datetime(2019,1,random.randint(1,30)).isoformat()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'pvzljmteydmlqphuwlulnilmyywjdpjdoelhxfkphdvmmoqosthvmqjphkqvacpkmhnbsybncigxkfdfzwlahbamraedttgcxoga'" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"letters(100)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"loading info from /Users/josh/Projects/jupyter_test/MediaInfoTemplates/mkv.json\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"'{\\n \"media\": {\\n \"@ref\": \"/Users/josh/Downloads/2019-05-01/Das.Boot.1981.Directors.Cut.Bluray.1080p.DTS-HD.x264-Grym/Das.Boot.1981.Directors.Cut.Bluray.1080p.DTS-HD.x264-Grym@BTNET/Das.Boot.Extras-Grym@BTNET/Back.to.the.Boat.(Doc)-Grym@BTNET/Back.to.the.Boat.(Doc)[email protected]\",\\n \"track\": [\\n {\\n \"@type\": \"General\",\\n \"UniqueID\": \"{uuid.uuid4()}\",\\n \"VideoCount\": \"1\",\\n \"AudioCount\": \"1\",\\n \"MenuCount\": \"1\",\\n \"FileExtension\": \"mkv\",\\n \"Format\": \"Matroska\",\\n \"Format_Version\": \"2\",\\n \"FileSize\": \"{random.randint(10000,1000000)}\",\\n \"Duration\": \"2688.436\",\\n \"OverallBitRate\": \"3388082\",\\n \"FrameRate\": \"23.976\",\\n \"FrameCount\": \"{random.randint(100,10000)}\",\\n \"StreamSize\": \"22656697\",\\n \"IsStreamable\": \"Yes\",\\n \"Title\": \"{letters(100)}\",\\n \"Movie\": \"{letters(50)}\",\\n \"Encoded_Date\": \"{datetime.datetime(2019,1,random.randint(1,30)).isoformat()}\",\\n \"File_Modified_Date\": \"UTC 2013-12-08 10:09:36\",\\n \"File_Modified_Date_Local\": \"2013-12-08 03:09:36\",\\n \"Encoded_Application\": \"DVDFab\",\\n \"Encoded_Library\": \"libebml v0.7.8 + libmatroska v0.8.1\"\\n },\\n {\\n \"@type\": \"Video\",\\n \"StreamOrder\": \"0\",\\n \"ID\": \"1\",\\n \"UniqueID\": \"329804448\",\\n \"Format\": \"AVC\",\\n \"Format_Profile\": \"High\",\\n \"Format_Level\": \"4.1\",\\n \"Format_Settings_CABAC\": \"Yes\",\\n \"Format_Settings_RefFrames\": \"2\",\\n \"CodecID\": \"V_MPEG4/ISO/AVC\",\\n \"Duration\": \"2688.438\",\\n \"BitRate\": \"2872659\",\\n \"Width\": \"1280\",\\n \"Height\": \"720\",\\n \"Sampled_Width\": \"1280\",\\n \"Sampled_Height\": \"720\",\\n \"PixelAspectRatio\": \"1.000\",\\n \"DisplayAspectRatio\": \"1.778\",\\n \"FrameRate_Mode\": \"CFR\",\\n \"FrameRate\": \"23.976\",\\n \"FrameCount\": \"64458\",\\n \"ColorSpace\": \"YUV\",\\n \"ChromaSubsampling\": \"4:2:0\",\\n \"BitDepth\": \"8\",\\n \"ScanType\": \"Progressive\",\\n \"Delay\": \"0.000\",\\n \"StreamSize\": \"965370916\",\\n \"Encoded_Library\": \"x264 - core\",\\n \"Encoded_Library_Name\": \"x264\",\\n \"Encoded_Library_Version\": \"core\",\\n \"Default\": \"Yes\",\\n \"Forced\": \"No\",\\n \"colour_description_present\": \"Yes\",\\n \"colour_description_present_Source\": \"Stream\",\\n \"colour_range\": \"Limited\",\\n \"colour_range_Source\": \"Stream\",\\n \"colour_primaries\": \"BT.709\",\\n \"colour_primaries_Source\": \"Stream\",\\n \"transfer_characteristics\": \"BT.709\",\\n \"transfer_characteristics_Source\": \"Stream\",\\n \"matrix_coefficients\": \"BT.709\",\\n \"matrix_coefficients_Source\": \"Stream\"\\n },\\n {\\n \"@type\": \"Audio\",\\n \"StreamOrder\": \"1\",\\n \"ID\": \"2\",\\n \"UniqueID\": \"857171869\",\\n \"Format\": \"AC-3\",\\n \"Format_Commercial_IfAny\": \"Dolby Digital\",\\n \"Format_Settings_Endianness\": \"Big\",\\n \"CodecID\": \"A_AC3\",\\n \"Duration\": \"2688.436\",\\n \"BitRate_Mode\": \"CBR\",\\n \"BitRate\": \"448000\",\\n \"Channels\": \"6\",\\n \"ChannelPositions\": \"Front: L C R, Side: L R, LFE\",\\n \"ChannelLayout\": \"L R C LFE Ls Rs\",\\n \"SamplesPerFrame\": \"1536\",\\n \"SamplingRate\": \"48000\",\\n \"SamplingCount\": \"129044928\",\\n \"FrameRate\": \"31.250\",\\n \"BitDepth\": \"16\",\\n \"Compression_Mode\": \"Lossy\",\\n \"Delay\": \"0.000\",\\n \"Delay_Source\": \"Container\",\\n \"StreamSize\": \"150552416\",\\n \"StreamSize_Proportion\": \"0.13223\",\\n \"Language\": \"en\",\\n \"ServiceKind\": \"CM\",\\n \"Default\": \"Yes\",\\n \"Forced\": \"No\",\\n \"extra\": {\\n \"bsid\": \"8\",\\n \"dialnorm\": \"-31\",\\n \"compr\": \"-0.28\",\\n \"acmod\": \"7\",\\n \"lfeon\": \"1\",\\n \"dialnorm_Average\": \"-31\",\\n \"dialnorm_Minimum\": \"-31\",\\n \"compr_Average\": \"0.42\",\\n \"compr_Minimum\": \"-2.50\",\\n \"compr_Maximum\": \"1.94\",\\n \"compr_Count\": \"334\",\\n \"dynrng_Average\": \"0.25\",\\n \"dynrng_Minimum\": \"-2.68\",\\n \"dynrng_Maximum\": \"1.94\",\\n \"dynrng_Count\": \"556\"\\n }\\n },\\n {\\n \"@type\": \"Menu\",\\n \"extra\": {\\n \"_00_00_00_000\": \"en:(01)00:00:00:000\",\\n \"_00_44_48_769\": \"en:(02)00:44:48:769\"\\n }\\n }\\n ]\\n }\\n}\\n'" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"base_dir = Path('/Users/josh/Projects/jupyter_test/MediaInfoTemplates')\n", | |
"raw = ''\n", | |
"filename = f\"{base_dir}/mkv.json\"\n", | |
"print(f\"loading info from {filename}\")\n", | |
"with open(filename) as f:\n", | |
" raw = f.read()\n", | |
"\n", | |
"# string.Formatter.format(mf)\n", | |
"# mf = json.load(f)\n", | |
"raw# .format(**locals())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'key': 'rec-500',\n", | |
" 'letters': 'dgruaxbaaaaagaikwqabkxcanpilhwupedytitgdrwcrrckrhvocapkewubqaedycdvmdekmrlzxlotfjcfjcaibwbosfjwwlgpoyoljnfbhodarfhzidcyynijczqha',\n", | |
" 'number': 500}" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Query by ID\n", | |
"es.get(index=\"es_test\", id=\"rec-500\")['_source']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"10 entries\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>score</th>\n", | |
" <th>key</th>\n", | |
" <th>number</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1.0</td>\n", | |
" <td>rec-500</td>\n", | |
" <td>500</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1.0</td>\n", | |
" <td>rec-501</td>\n", | |
" <td>501</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1.0</td>\n", | |
" <td>rec-502</td>\n", | |
" <td>502</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1.0</td>\n", | |
" <td>rec-503</td>\n", | |
" <td>503</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1.0</td>\n", | |
" <td>rec-504</td>\n", | |
" <td>504</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>1.0</td>\n", | |
" <td>rec-505</td>\n", | |
" <td>505</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>1.0</td>\n", | |
" <td>rec-506</td>\n", | |
" <td>506</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>1.0</td>\n", | |
" <td>rec-507</td>\n", | |
" <td>507</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>1.0</td>\n", | |
" <td>rec-508</td>\n", | |
" <td>508</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>1.0</td>\n", | |
" <td>rec-509</td>\n", | |
" <td>509</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" score key number\n", | |
"0 1.0 rec-500 500\n", | |
"1 1.0 rec-501 501\n", | |
"2 1.0 rec-502 502\n", | |
"3 1.0 rec-503 503\n", | |
"4 1.0 rec-504 504\n", | |
"5 1.0 rec-505 505\n", | |
"6 1.0 rec-506 506\n", | |
"7 1.0 rec-507 507\n", | |
"8 1.0 rec-508 508\n", | |
"9 1.0 rec-509 509" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Search by length\n", | |
"res = es.search(index=\"es_test\", body={\"query\": {\"range\": {\"number\": {\"gte\": 500, \"lte\": 510}}}})\n", | |
"dat = []\n", | |
"for hit in res['hits']['hits']:\n", | |
" dat.append({\n", | |
" 'score': hit['_score'],\n", | |
" 'key': hit['_source']['key'],\n", | |
" 'number': hit['_source']['number'],\n", | |
" })\n", | |
"\n", | |
"print(f'{len(dat)} entries')\n", | |
"pd.DataFrame(dat, columns=['score', 'key', 'number'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment