Skip to content

Instantly share code, notes, and snippets.

@entrepeneur4lyf
Created March 13, 2025 10:05
Show Gist options
  • Save entrepeneur4lyf/4f819357889b3490e67a036a3bbcec26 to your computer and use it in GitHub Desktop.
Save entrepeneur4lyf/4f819357889b3490e67a036a3bbcec26 to your computer and use it in GitHub Desktop.
Grok repo to kg and semantic search
use gitoxide::Repository;
use std::env;
use std::path::Path;
use std::fs::File;
use std::io::Read;
use syn::parse_file;
use syn::Item;
use oxigraph::{
model::{
GraphName, NamedNode, Term, Triple, Vocabulary,
},
store::Store,
};
use std::collections::HashMap;
use serde_json;
const CONTEXT: &str = r#"{
"@context": {
"File": "http://example.org/vocab#File",
"Function": "http://example.org/vocab#Function",
"Struct": "http://example.org/vocab#Struct",
"Enum": "http://example.org/vocab#Enum",
"name": "http://example.org/vocab#name",
"signature": "http://example.org/vocab#signature",
"documentation": "http://example.org/vocab#documentation",
"contains": "http://example.org/vocab#contains"
}
}"#;
fn list_files<'a>(repo: &'a Repository, tree: &'a gitoxide::tree::Tree, current_path: &Path) -> Result<Vec<String>, Box<dyn std::error::Error>> {
let mut files = Vec::new();
for entry in tree.entries() {
let entry_name = entry.filename().to_string_lossy().into_owned();
let entry_path = current_path.join(&entry_name);
match entry.object_type() {
gitoxide::object::Kind::Tree => {
let sub_tree = repo.find_tree(entry.oid())?;
let sub_files = list_files(&repo, &sub_tree, &entry_path)?;
files.extend(sub_files);
}
gitoxide::object::Kind::Blob => {
if entry_path.to_str().unwrap().ends_with(".rs") {
files.push(entry_path.to_str().unwrap().to_string());
}
}
_ => continue,
}
}
Ok(files)
}
fn extract_information(file_path: &str) -> Result<Vec<Item>, syn::Error> {
let mut file = File::open(file_path).expect("Unable to open file");
let mut content = String::new();
file.read_to_string(&mut content).expect("Unable to read file");
parse_file(&content).map(|file| file.items)
}
fn build_knowledge_graph(files: &[String]) -> String {
let mut graph = HashMap::new();
for file_path in files {
let items = extract_information(file_path).unwrap();
let file_id = format!("<{}>", file_path);
graph.insert(file_id.clone(), serde_json::json!({
"@id": file_path,
"@type": "File",
"contains": Vec::new()
}));
for item in items {
match item {
Item::Fn(function) => {
let func_name = function.sig.ident.to_string();
let func_id = format!("<{}#{}>", file_path, func_name);
let signature = format!("{}", function.sig);
let documentation = function
.attrs
.iter()
.filter(|attr| attr.path.is_ident("doc"))
.map(|attr| attr.parse_meta().unwrap().to_string())
.collect::<Vec<String>>()
.join("\n");
graph.insert(func_id.clone(), serde_json::json!({
"@id": func_id,
"@type": "Function",
"name": func_name,
"signature": signature,
"documentation": documentation
}));
graph.get_mut(&file_id).unwrap()["contains"].as_array_mut().unwrap().push(func_id);
}
Item::Struct(structure) => {
let struct_name = structure.ident.to_string();
let struct_id = format!("<{}#{}>", file_path, struct_name);
graph.insert(struct_id.clone(), serde_json::json!({
"@id": struct_id,
"@type": "Struct",
"name": struct_name
}));
graph.get_mut(&file_id).unwrap()["contains"].as_array_mut().unwrap().push(struct_id);
}
Item::Enum(enumeration) => {
let enum_name = enumeration.ident.to_string();
let enum_id = format!("<{}#{}>", file_path, enum_name);
graph.insert(enum_id.clone(), serde_json::json!({
"@id": enum_id,
"@type": "Enum",
"name": enum_name
}));
graph.get_mut(&file_id).unwrap()["contains"].as_array_mut().unwrap().push(enum_id);
}
_ => continue,
}
}
}
let mut json_ld = serde_json::json!({
"@context": serde_json::from_str(CONTEXT).unwrap(),
"@graph": Vec::new()
});
for (_, value) in graph {
json_ld["@graph"].as_array_mut().unwrap().push(value);
}
serde_json::to_string_pretty(&json_ld).unwrap()
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let args: Vec<String> = env::args().collect();
if args.len() != 2 {
println!("Usage: {} <local_path>", args[0]);
return Ok(());
}
let path = &args[1];
let repo = Repository::open(path)?;
let head = repo.head()?;
let commit = repo.find_commit(head.target().unwrap())?;
let tree = repo.find_tree(commit.tree_id())?;
let files = list_files(&repo, &tree, Path::new(""))?;
let rust_files: Vec<String> = files
.iter()
.filter(|file| file.ends_with(".rs"))
.map(|file| file.to_string())
.collect();
let json_ld = build_knowledge_graph(&rust_files);
// Load JSON-LD into Oxigraph
let store = Store::new()?;
let graph_name = GraphName::default_graph();
store.load_jsonld(&json_ld, graph_name).await?;
// Example SPARQL query for semantic search
let query = r#"SELECT ?func ?name ?doc WHERE {
?func a <http://example.org/vocab#Function> ;
<http://example.org/vocab#name> ?name ;
<http://example.org/vocab#documentation> ?doc .
FILTER(CONTAINS(?name, "hello") || CONTAINS(?doc, "simple"))
}"#;
let results = store.query(query).await?;
for result in results {
println!("\nFunction ID: {}", result["func"].to_string());
println!("\tName: {}", result["name"].to_string());
println!("\tDocumentation: {}", result["doc"].to_string());
}
Ok(())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment