Skip to content

Instantly share code, notes, and snippets.

@GuyPaddock
Created August 8, 2024 03:31
Show Gist options
  • Save GuyPaddock/3c5bdadfb2ad4d8ba794834806a2308c to your computer and use it in GitHub Desktop.
Save GuyPaddock/3c5bdadfb2ad4d8ba794834806a2308c to your computer and use it in GitHub Desktop.
Loading data into Elasticsearch
import json
from elasticsearch import Elasticsearch
# Load JSON data
with open("documents.json", "r") as file:
documents_raw = json.load(file)
# Flatten the documents structure
documents = []
for course in documents_raw:
course_name = course['course']
for doc in course['documents']:
doc['course'] = course_name
documents.append(doc)
# Initialize Elasticsearch client
es_client = Elasticsearch('http://localhost:9200')
# Define index settings and mappings
def create_index(index_name="course-questions"):
index_settings = {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"properties": {
"text": {"type": "text"},
"section": {"type": "text"},
"question": {"type": "text"},
"course": {"type": "keyword"}
}
}
}
if not es_client.indices.exists(index=index_name):
es_client.indices.create(index=index_name, body=index_settings)
# Create the index
create_index()
# Index documents
for doc in documents:
es_client.index(index="course-questions", body=doc)
print("Data indexed successfully")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment