Created
December 6, 2024 20:57
-
-
Save aliirz/e1ac15ac0bd8b7472fb60107fd0c57bb to your computer and use it in GitHub Desktop.
k commons
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fastapi import FastAPI, HTTPException | |
from transformers import GemmaForCausalLM, GemmaTokenizer | |
import torch | |
from typing import Dict, List | |
import sqlite3 | |
import asyncio | |
import json | |
from datetime import datetime | |
class LocalGemmaProcessor: | |
def __init__(self, model_size: str = "2b"): | |
"""Initialize Gemma model for local processing | |
Args: | |
model_size: '2b' or '7b' for different Gemma model sizes | |
""" | |
self.model_name = f"google/gemma-{model_size}" | |
self.tokenizer = GemmaTokenizer.from_pretrained(self.model_name) | |
self.model = GemmaForCausalLM.from_pretrained( | |
self.model_name, | |
device_map="auto", # Use GPU if available | |
torch_dtype=torch.float16 # Use half precision for memory efficiency | |
) | |
async def process_academic_text(self, content: str, task: str) -> Dict: | |
"""Process academic content using Gemma | |
Args: | |
content: The academic text to process | |
task: 'summarize', 'simplify', or 'extract_key_points' | |
""" | |
prompt_templates = { | |
"summarize": "Summarize this academic text while preserving key technical details:\n{text}", | |
"simplify": "Explain this academic text in simpler terms while keeping accuracy:\n{text}", | |
"extract_key_points": "Extract and list the main findings from this academic text:\n{text}" | |
} | |
prompt = prompt_templates[task].format(text=content) | |
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device) | |
outputs = self.model.generate( | |
**inputs, | |
max_length=1024, | |
temperature=0.3, | |
do_sample=True | |
) | |
processed_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return { | |
"processed_text": processed_text, | |
"metadata": { | |
"model": "Gemma", | |
"model_size": self.model_name, | |
"task": task, | |
"timestamp": datetime.now().isoformat(), | |
"processing_type": "local_ai" | |
} | |
} | |
class KnowledgeNode: | |
def __init__(self, node_id: str, region: str = "PK"): | |
self.node_id = node_id | |
self.region = region | |
self.db = sqlite3.connect(f"{node_id}.db") | |
self.ai_processor = LocalGemmaProcessor() | |
self.setup_db() | |
def setup_db(self): | |
"""Initialize local database for storing papers and their processed versions""" | |
self.db.execute(""" | |
CREATE TABLE IF NOT EXISTS papers ( | |
hash TEXT PRIMARY KEY, | |
title TEXT, | |
content TEXT, | |
language TEXT, | |
processed_versions TEXT, | |
metadata TEXT, | |
timestamp DATETIME, | |
license TEXT | |
) | |
""") | |
self.db.execute(""" | |
CREATE TABLE IF NOT EXISTS community_feedback ( | |
paper_hash TEXT, | |
processor_version TEXT, | |
feedback_text TEXT, | |
quality_rating INTEGER, | |
timestamp DATETIME, | |
FOREIGN KEY(paper_hash) REFERENCES papers(hash) | |
) | |
""") | |
self.db.commit() | |
app = FastAPI(title="Knowledge Commons Node (Gemma-powered)") | |
node = KnowledgeNode("karachi_demo_node") | |
@app.post("/papers/process") | |
async def process_paper(paper_hash: str, task: str): | |
"""Process a paper using local Gemma model""" | |
try: | |
cursor = node.db.execute( | |
"SELECT content FROM papers WHERE hash = ?", | |
(paper_hash,) | |
) | |
result = cursor.fetchone() | |
if not result: | |
raise HTTPException(status_code=404, detail="Paper not found") | |
processed = await node.ai_processor.process_academic_text( | |
result[0], task | |
) | |
# Store processed version | |
node.db.execute( | |
"UPDATE papers SET processed_versions = json_insert(COALESCE(processed_versions, '{}'), '$." + task + "', ?)", | |
(json.dumps(processed),) | |
) | |
node.db.commit() | |
return processed | |
except Exception as e: | |
raise HTTPException(status_code=400, detail=str(e)) | |
# Demo usage | |
if __name__ == "__main__": | |
print("Initializing Knowledge Commons Node with Gemma...") | |
demo_paper = { | |
"title": "Community Networks and Digital Rights in South Asia", | |
"content": """ | |
This research examines the implementation of community-operated networks | |
in South Asian regions with limited internet access. Our findings show | |
that locally-managed infrastructure can significantly improve digital | |
rights and knowledge access in underserved communities. | |
Key findings include: | |
1. 73% increase in educational resource access | |
2. Significant improvement in local content creation | |
3. Enhanced digital privacy awareness | |
The study suggests that community ownership of digital infrastructure | |
leads to more sustainable and equitable access to knowledge. | |
""", | |
"language": "en", | |
"license": "CC-BY-SA-4.0" | |
} | |
# Demo different processing tasks | |
tasks = ["summarize", "simplify", "extract_key_points"] | |
print("\nRunning demo with Gemma model...") | |
print("This would typically process the paper in multiple ways...") | |
print("All processing happens locally on the node...") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment