Skip to content

Instantly share code, notes, and snippets.

@sabatale
Created January 6, 2024 19:31
Show Gist options
  • Save sabatale/754019aa244a5f02ef53b321512c3083 to your computer and use it in GitHub Desktop.
Save sabatale/754019aa244a5f02ef53b321512c3083 to your computer and use it in GitHub Desktop.
Generic JSON Loader for Langchain Python
"""See https://github.com/langchain-ai/langchain/issues/4396 for original author."""
"""Loader that loads data from JSON."""
import json
from pathlib import Path
from typing import Callable, Dict, List, Optional, Union
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
class JSONLoader(BaseLoader):
def __init__(
self,
file_path: Union[str, Path],
content_key: Optional[str] = None,
):
self.file_path = Path(file_path).resolve()
self._content_key = content_key
def create_documents(self, processed_data):
documents = []
for item in processed_data:
content = ''.join(item)
document = Document(page_content=content, metadata={})
documents.append(document)
return documents
def process_item(self, item, prefix=""):
if isinstance(item, dict):
result = []
for key, value in item.items():
new_prefix = f"{prefix}.{key}" if prefix else key
result.extend(self.process_item(value, new_prefix))
return result
elif isinstance(item, list):
result = []
for value in item:
result.extend(self.process_item(value, prefix))
return result
else:
return [f"{prefix}: {item}"]
def process_json(self,data):
if isinstance(data, list):
processed_data = []
for item in data:
processed_data.extend(self.process_item(item))
return processed_data
elif isinstance(data, dict):
return self.process_item(data)
else:
return []
def load(self) -> List[Document]:
"""Load and return documents from the JSON file."""
docs=[]
with open(self.file_path, mode="r", encoding="utf-8") as json_file:
try:
data = json.load(json_file)
processed_json = self.process_json(data)
docs = self.create_documents(processed_json)
except json.JSONDecodeError:
print("Error: Invalid JSON format in the file.")
return docs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment