Created
November 24, 2021 11:48
-
-
Save davidlopezre/57ffcef25a6684b7f68e5ed16d61e489 to your computer and use it in GitHub Desktop.
Attempt to get git unreachable objects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::collections::HashSet; | |
use git2::{Commit, Repository, Tree, TreeWalkMode, TreeWalkResult}; | |
use thiserror::Error; | |
fn main() -> Result<(), Error> { | |
let repository = Repository::open_from_env()?; | |
let odb = repository.odb()?; | |
let mut objects: HashSet<String> = HashSet::new(); | |
// get all objects in repository | |
odb.foreach(|oid| { | |
objects.insert(oid.to_string()); | |
true | |
})?; | |
println!("object count: {}", objects.len()); | |
let mut revwalk = repository.revwalk()?; | |
let references = repository.references_glob("refs/*")?; | |
for r in references { | |
let head_oid = r?.resolve()?.target().unwrap(); | |
println!("ref: {}", head_oid); | |
revwalk.push(head_oid)?; | |
} | |
repository.tag_foreach(|oid, tag_name| { | |
println!("tag: {}", String::from_utf8_lossy(tag_name)); | |
// remove tag object from unreachable objects Hashset | |
objects.remove(&oid.to_string()); | |
if let Ok(()) = revwalk.push(oid) { | |
return true; | |
} | |
return false; | |
})?; | |
let mut objects_seen: HashSet<String> = HashSet::new(); | |
for commit_oid in revwalk { | |
let commit_oid = commit_oid?; | |
objects_seen.insert(commit_oid.to_string()); | |
let commit = repository.find_commit(commit_oid)?; | |
let mut reachable_objects = objects_in_commit(commit, &mut objects_seen)?; | |
reachable_objects.insert(commit_oid.to_string()); | |
reachable_objects.iter().for_each(|ro| { | |
objects.remove(ro); | |
}); | |
if objects.len() == 0 { | |
break; | |
} | |
} | |
println!("unreachable objects: {:?}", objects); | |
println!("unreachable objects count: {:?}", objects.len()); | |
Ok(()) | |
} | |
fn objects_in_commit( | |
commit: Commit, | |
objects_seen: &mut HashSet<String>, | |
) -> Result<HashSet<String>, Error> { | |
objects_seen.insert(commit.id().to_string()); | |
let tree = commit.tree()?; | |
let mut objects = HashSet::new(); | |
objects.insert(tree.id().to_string()); | |
let tree_objects = objects_in_tree(tree, objects_seen)?; | |
objects.extend(tree_objects); | |
Ok(objects) | |
} | |
fn objects_in_tree( | |
tree: Tree, | |
objects_seen: &mut HashSet<String>, | |
) -> Result<HashSet<String>, Error> { | |
if objects_seen.contains(&tree.id().to_string()) { | |
return Ok(HashSet::new()); | |
} | |
objects_seen.insert(tree.id().to_string()); | |
let mut objects = HashSet::new(); | |
tree.walk(TreeWalkMode::PreOrder, |_, entry| { | |
if objects_seen.contains(&entry.id().to_string()) { | |
return TreeWalkResult::Skip; | |
} else { | |
objects_seen.insert(entry.id().to_string()); | |
objects.insert(entry.id().to_string()); | |
TreeWalkResult::Ok | |
} | |
})?; | |
Ok(objects) | |
} | |
#[derive(Error, Debug)] | |
pub enum Error { | |
#[error(transparent)] | |
Git2Error(#[from] git2::Error), | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment