Skip to content

Instantly share code, notes, and snippets.

@davidlopezre
Created November 24, 2021 11:48
Show Gist options
  • Save davidlopezre/57ffcef25a6684b7f68e5ed16d61e489 to your computer and use it in GitHub Desktop.
Save davidlopezre/57ffcef25a6684b7f68e5ed16d61e489 to your computer and use it in GitHub Desktop.
Attempt to get git unreachable objects
use std::collections::HashSet;
use git2::{Commit, Repository, Tree, TreeWalkMode, TreeWalkResult};
use thiserror::Error;
fn main() -> Result<(), Error> {
let repository = Repository::open_from_env()?;
let odb = repository.odb()?;
let mut objects: HashSet<String> = HashSet::new();
// get all objects in repository
odb.foreach(|oid| {
objects.insert(oid.to_string());
true
})?;
println!("object count: {}", objects.len());
let mut revwalk = repository.revwalk()?;
let references = repository.references_glob("refs/*")?;
for r in references {
let head_oid = r?.resolve()?.target().unwrap();
println!("ref: {}", head_oid);
revwalk.push(head_oid)?;
}
repository.tag_foreach(|oid, tag_name| {
println!("tag: {}", String::from_utf8_lossy(tag_name));
// remove tag object from unreachable objects Hashset
objects.remove(&oid.to_string());
if let Ok(()) = revwalk.push(oid) {
return true;
}
return false;
})?;
let mut objects_seen: HashSet<String> = HashSet::new();
for commit_oid in revwalk {
let commit_oid = commit_oid?;
objects_seen.insert(commit_oid.to_string());
let commit = repository.find_commit(commit_oid)?;
let mut reachable_objects = objects_in_commit(commit, &mut objects_seen)?;
reachable_objects.insert(commit_oid.to_string());
reachable_objects.iter().for_each(|ro| {
objects.remove(ro);
});
if objects.len() == 0 {
break;
}
}
println!("unreachable objects: {:?}", objects);
println!("unreachable objects count: {:?}", objects.len());
Ok(())
}
fn objects_in_commit(
commit: Commit,
objects_seen: &mut HashSet<String>,
) -> Result<HashSet<String>, Error> {
objects_seen.insert(commit.id().to_string());
let tree = commit.tree()?;
let mut objects = HashSet::new();
objects.insert(tree.id().to_string());
let tree_objects = objects_in_tree(tree, objects_seen)?;
objects.extend(tree_objects);
Ok(objects)
}
fn objects_in_tree(
tree: Tree,
objects_seen: &mut HashSet<String>,
) -> Result<HashSet<String>, Error> {
if objects_seen.contains(&tree.id().to_string()) {
return Ok(HashSet::new());
}
objects_seen.insert(tree.id().to_string());
let mut objects = HashSet::new();
tree.walk(TreeWalkMode::PreOrder, |_, entry| {
if objects_seen.contains(&entry.id().to_string()) {
return TreeWalkResult::Skip;
} else {
objects_seen.insert(entry.id().to_string());
objects.insert(entry.id().to_string());
TreeWalkResult::Ok
}
})?;
Ok(objects)
}
#[derive(Error, Debug)]
pub enum Error {
#[error(transparent)]
Git2Error(#[from] git2::Error),
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment