pascalwhoop · September 16, 2024 11:10
diff --git a/cached_inference.py b/cached_inference.py
 from typing import Dict, Any
 import rocksdb
 import ray

 # Step 1: Initialize RocksDB instance
 # note this is mounted in the pod using a high IOPS ReadWriteMany Volume backed by GCP Hyperdisk
 db = rocksdb.DB("rocksdb_dir", rocksdb.Options(create_if_missing=True))

 # Step 2: Define a Predictor class for inference.
 class HuggingFacePredictor:
    def __init__(self):
        from transformers import pipeline
        # Initialize a pre-trained GPT2 Huggingface pipeline.
        self.model = pipeline("text-generation", model="gpt2")

    # Logic for inference on 1 row of data.
    def __call__(self, row: Dict[str, Any]) -> Dict[str, Any]:
        input_text = row["input"]

        # Check if the prediction is already in the cache
        prediction = db.get(input_text.encode())
        if prediction is not None:
            row["result"] = prediction.decode()
            return row

        # Get the prediction from the model
        predictions = self.model([input_text], max_length=20, num_return_sequences=1)
        prediction = predictions[0]["generated_text"]

        # Store the prediction in the cache
        db.put(input_text.encode(), prediction.encode())

        row["result"] = prediction
        return row


 # Step 3: Define the Kedro node function
 def predict_and_cache(input_ds: ray.data.Dataset) -> ray.data.Dataset:
    # Step 4: Map the Predictor over the Dataset to get predictions.
    return input_ds.map(HuggingFacePredictor)
	from typing import Dict, Any
	import rocksdb
	import ray

	# Step 1: Initialize RocksDB instance
	# note this is mounted in the pod using a high IOPS ReadWriteMany Volume backed by GCP Hyperdisk
	db = rocksdb.DB("rocksdb_dir", rocksdb.Options(create_if_missing=True))

	# Step 2: Define a Predictor class for inference.
	class HuggingFacePredictor:
	def __init__(self):
	from transformers import pipeline
	# Initialize a pre-trained GPT2 Huggingface pipeline.
	self.model = pipeline("text-generation", model="gpt2")

	# Logic for inference on 1 row of data.
	def __call__(self, row: Dict[str, Any]) -> Dict[str, Any]:
	input_text = row["input"]

	# Check if the prediction is already in the cache
	prediction = db.get(input_text.encode())
	if prediction is not None:
	row["result"] = prediction.decode()
	return row

	# Get the prediction from the model
	predictions = self.model([input_text], max_length=20, num_return_sequences=1)
	prediction = predictions[0]["generated_text"]

	# Store the prediction in the cache
	db.put(input_text.encode(), prediction.encode())

	row["result"] = prediction
	return row


	# Step 3: Define the Kedro node function
	def predict_and_cache(input_ds: ray.data.Dataset) -> ray.data.Dataset:
	# Step 4: Map the Predictor over the Dataset to get predictions.
	return input_ds.map(HuggingFacePredictor)