Created
March 13, 2025 10:05
-
-
Save entrepeneur4lyf/4f819357889b3490e67a036a3bbcec26 to your computer and use it in GitHub Desktop.
Grok repo to kg and semantic search
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use gitoxide::Repository; | |
use std::env; | |
use std::path::Path; | |
use std::fs::File; | |
use std::io::Read; | |
use syn::parse_file; | |
use syn::Item; | |
use oxigraph::{ | |
model::{ | |
GraphName, NamedNode, Term, Triple, Vocabulary, | |
}, | |
store::Store, | |
}; | |
use std::collections::HashMap; | |
use serde_json; | |
const CONTEXT: &str = r#"{ | |
"@context": { | |
"File": "http://example.org/vocab#File", | |
"Function": "http://example.org/vocab#Function", | |
"Struct": "http://example.org/vocab#Struct", | |
"Enum": "http://example.org/vocab#Enum", | |
"name": "http://example.org/vocab#name", | |
"signature": "http://example.org/vocab#signature", | |
"documentation": "http://example.org/vocab#documentation", | |
"contains": "http://example.org/vocab#contains" | |
} | |
}"#; | |
fn list_files<'a>(repo: &'a Repository, tree: &'a gitoxide::tree::Tree, current_path: &Path) -> Result<Vec<String>, Box<dyn std::error::Error>> { | |
let mut files = Vec::new(); | |
for entry in tree.entries() { | |
let entry_name = entry.filename().to_string_lossy().into_owned(); | |
let entry_path = current_path.join(&entry_name); | |
match entry.object_type() { | |
gitoxide::object::Kind::Tree => { | |
let sub_tree = repo.find_tree(entry.oid())?; | |
let sub_files = list_files(&repo, &sub_tree, &entry_path)?; | |
files.extend(sub_files); | |
} | |
gitoxide::object::Kind::Blob => { | |
if entry_path.to_str().unwrap().ends_with(".rs") { | |
files.push(entry_path.to_str().unwrap().to_string()); | |
} | |
} | |
_ => continue, | |
} | |
} | |
Ok(files) | |
} | |
fn extract_information(file_path: &str) -> Result<Vec<Item>, syn::Error> { | |
let mut file = File::open(file_path).expect("Unable to open file"); | |
let mut content = String::new(); | |
file.read_to_string(&mut content).expect("Unable to read file"); | |
parse_file(&content).map(|file| file.items) | |
} | |
fn build_knowledge_graph(files: &[String]) -> String { | |
let mut graph = HashMap::new(); | |
for file_path in files { | |
let items = extract_information(file_path).unwrap(); | |
let file_id = format!("<{}>", file_path); | |
graph.insert(file_id.clone(), serde_json::json!({ | |
"@id": file_path, | |
"@type": "File", | |
"contains": Vec::new() | |
})); | |
for item in items { | |
match item { | |
Item::Fn(function) => { | |
let func_name = function.sig.ident.to_string(); | |
let func_id = format!("<{}#{}>", file_path, func_name); | |
let signature = format!("{}", function.sig); | |
let documentation = function | |
.attrs | |
.iter() | |
.filter(|attr| attr.path.is_ident("doc")) | |
.map(|attr| attr.parse_meta().unwrap().to_string()) | |
.collect::<Vec<String>>() | |
.join("\n"); | |
graph.insert(func_id.clone(), serde_json::json!({ | |
"@id": func_id, | |
"@type": "Function", | |
"name": func_name, | |
"signature": signature, | |
"documentation": documentation | |
})); | |
graph.get_mut(&file_id).unwrap()["contains"].as_array_mut().unwrap().push(func_id); | |
} | |
Item::Struct(structure) => { | |
let struct_name = structure.ident.to_string(); | |
let struct_id = format!("<{}#{}>", file_path, struct_name); | |
graph.insert(struct_id.clone(), serde_json::json!({ | |
"@id": struct_id, | |
"@type": "Struct", | |
"name": struct_name | |
})); | |
graph.get_mut(&file_id).unwrap()["contains"].as_array_mut().unwrap().push(struct_id); | |
} | |
Item::Enum(enumeration) => { | |
let enum_name = enumeration.ident.to_string(); | |
let enum_id = format!("<{}#{}>", file_path, enum_name); | |
graph.insert(enum_id.clone(), serde_json::json!({ | |
"@id": enum_id, | |
"@type": "Enum", | |
"name": enum_name | |
})); | |
graph.get_mut(&file_id).unwrap()["contains"].as_array_mut().unwrap().push(enum_id); | |
} | |
_ => continue, | |
} | |
} | |
} | |
let mut json_ld = serde_json::json!({ | |
"@context": serde_json::from_str(CONTEXT).unwrap(), | |
"@graph": Vec::new() | |
}); | |
for (_, value) in graph { | |
json_ld["@graph"].as_array_mut().unwrap().push(value); | |
} | |
serde_json::to_string_pretty(&json_ld).unwrap() | |
} | |
fn main() -> Result<(), Box<dyn std::error::Error>> { | |
let args: Vec<String> = env::args().collect(); | |
if args.len() != 2 { | |
println!("Usage: {} <local_path>", args[0]); | |
return Ok(()); | |
} | |
let path = &args[1]; | |
let repo = Repository::open(path)?; | |
let head = repo.head()?; | |
let commit = repo.find_commit(head.target().unwrap())?; | |
let tree = repo.find_tree(commit.tree_id())?; | |
let files = list_files(&repo, &tree, Path::new(""))?; | |
let rust_files: Vec<String> = files | |
.iter() | |
.filter(|file| file.ends_with(".rs")) | |
.map(|file| file.to_string()) | |
.collect(); | |
let json_ld = build_knowledge_graph(&rust_files); | |
// Load JSON-LD into Oxigraph | |
let store = Store::new()?; | |
let graph_name = GraphName::default_graph(); | |
store.load_jsonld(&json_ld, graph_name).await?; | |
// Example SPARQL query for semantic search | |
let query = r#"SELECT ?func ?name ?doc WHERE { | |
?func a <http://example.org/vocab#Function> ; | |
<http://example.org/vocab#name> ?name ; | |
<http://example.org/vocab#documentation> ?doc . | |
FILTER(CONTAINS(?name, "hello") || CONTAINS(?doc, "simple")) | |
}"#; | |
let results = store.query(query).await?; | |
for result in results { | |
println!("\nFunction ID: {}", result["func"].to_string()); | |
println!("\tName: {}", result["name"].to_string()); | |
println!("\tDocumentation: {}", result["doc"].to_string()); | |
} | |
Ok(()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment