Created
January 22, 2020 02:20
-
-
Save nviennot/396fce1ceaa4b00fe838cab7527072a7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![cfg_attr(debug_assertions, allow(dead_code, unused_imports, unused_variables))] | |
use packed_simd; | |
use std::env; | |
use std::error; | |
use std::fs::File; | |
use std::io::Seek; | |
use std::io::SeekFrom; | |
use std::io; | |
use std::io::Write; | |
use std::cmp::min; | |
use memmap::MmapOptions; | |
use pbr; | |
use std::time; | |
const ONE_KB: usize = 1024; | |
const ONE_MB: usize = 1024*1024; | |
type ScanT = packed_simd::u8x32; | |
fn scan_jpg_header(data: &[u8]) -> Option<usize> { | |
const VLEN: usize = ScanT::lanes(); | |
const PATTERN_LEN: usize = 3; | |
let p1 = ScanT::splat(0xff); | |
let p2 = ScanT::splat(0xd8); | |
let p3 = ScanT::splat(0xff); | |
// TODO: Finish the search on the part of the array that doesn't | |
// fit in a vector. | |
let num_chunks = (data.len()-PATTERN_LEN+1)/VLEN; | |
for chunk_index in 0..num_chunks { | |
let c1 = ScanT::from_slice_unaligned(&data[chunk_index*VLEN..]); | |
let c2 = ScanT::from_slice_unaligned(&data[chunk_index*VLEN+1..]); | |
let c3 = ScanT::from_slice_unaligned(&data[chunk_index*VLEN+2..]); | |
let matches = c1.eq(p1) & c2.eq(p2) & c3.eq(p3); | |
if matches.any() { | |
match (0..VLEN).filter(|i| matches.extract(*i)).next() { | |
Some(i) => return Some(chunk_index*VLEN + i), | |
_ => () | |
} | |
} | |
} | |
return None; | |
} | |
#[derive(PartialEq)] | |
#[derive(Debug)] | |
enum MarkerKind { | |
SOI, APPn, DQT, SOF0, DHT, SOF2, DRI, RSTn, COM, SOS, EOI, | |
} | |
#[derive(Debug)] | |
struct Marker { | |
kind: MarkerKind, | |
len: usize, | |
} | |
impl Marker { | |
fn build(kind: MarkerKind, payload_len: usize, | |
buffer_len: usize) -> Option<Self> { | |
let marker_len = payload_len + 2; | |
if marker_len > buffer_len { | |
return None; | |
} else { | |
return Some(Self{kind: kind, len: marker_len}); | |
} | |
} | |
fn scan(buf: &[u8]) -> Option<Self> { | |
let size = buf.len(); | |
if size < 2 { | |
return None; | |
} | |
let header = ((buf[0] as u16) << 8) | buf[1] as u16; | |
match header { | |
0xffd8 => return Self::build(MarkerKind::SOI, 0, size), | |
0xffda => return Self::build(MarkerKind::SOS, 0, size), | |
0xffd9 => return Self::build(MarkerKind::EOI, 0, size), | |
_ => (), | |
}; | |
if size < 4 { | |
return None; | |
} | |
let var_len = ((buf[2] as usize) << 8) | buf[3] as usize; | |
match header { | |
0xfffe => return Self::build(MarkerKind::COM, var_len, size), | |
0xffc0 => return Self::build(MarkerKind::SOF0, var_len, size), | |
0xffc4 => return Self::build(MarkerKind::DHT, var_len, size), | |
0xffc2 => return Self::build(MarkerKind::SOF2, var_len, size), | |
0xffdb => return Self::build(MarkerKind::DQT, var_len, size), | |
0xffdd => return Self::build(MarkerKind::DRI, 4, size), | |
0xffd0..=0xffd7 => return Self::build(MarkerKind::RSTn, var_len, size), | |
0xffe0..=0xffef => return Self::build(MarkerKind::APPn, var_len, size), | |
_ => return None, | |
} | |
} | |
} | |
fn get_end_of_jpg(buf: &[u8]) -> Option<usize> { | |
let size = buf.len(); | |
let mut offset = 0; | |
// Step 1: Make sure the SOI marker is present | |
let marker: Marker = Marker::scan(&buf[offset..])?; | |
if marker.kind != MarkerKind::SOI { | |
return None; | |
} | |
offset += marker.len; | |
// Step 2: Go through all markers until MARKER_SOS | |
while offset < size { | |
let marker = Marker::scan(&buf[offset..])?; | |
offset += marker.len; | |
if marker.kind == MarkerKind::SOS { | |
break; | |
} | |
} | |
// After start of scan, the raw data of the image | |
// is present. We don't know where it ends. There's | |
// no length indicator. | |
// Step 3: Search for MARKER_EOI | |
// if we looked through 50MB of data, give up | |
while offset < size && offset < 50*ONE_MB { | |
match Marker::scan(&buf[offset..]) { | |
Some(Marker{kind: MarkerKind::EOI, len}) => | |
return Some(offset + len), | |
_ => () | |
} | |
// Note: We do byte by byte because the EOI marker | |
// can be anywhere | |
offset += 1; | |
} | |
return None; | |
} | |
fn create_empty_jpg() -> io::Result<File> { | |
static mut IMG_NUM: u32 = 0; | |
unsafe { | |
IMG_NUM += 1; | |
return File::create(format!("{:03}.jpg", IMG_NUM)); | |
} | |
} | |
fn recover_jpg(buf: &[u8]) -> io::Result<()> { | |
let mut file = create_empty_jpg()?; | |
file.write_all(&buf)?; | |
Ok(()) | |
} | |
fn maybe_recover_jpg(buf: &[u8]) -> io::Result<()> { | |
match get_end_of_jpg(buf) { | |
Some(end) => recover_jpg(&buf[..end]), | |
None => Ok(()) | |
} | |
} | |
fn undelete_jpg(buf: &[u8]) -> io::Result<usize> { | |
let mut img_count = 0; | |
let size = buf.len(); | |
let mut offset = 0; | |
let mut pbar = pbr::ProgressBar::new(size as u64); | |
pbar.set_units(pbr::Units::Bytes); | |
pbar.set_max_refresh_rate(Some(time::Duration::from_millis(500))); | |
while offset < size { | |
let scan_for = min(10*ONE_KB, size-offset); | |
let inc_by = match scan_jpg_header(&buf[offset..offset+scan_for]) { | |
Some(offset_header) => { | |
maybe_recover_jpg(&buf[offset+offset_header..])?; | |
img_count += 1; | |
offset_header+1 | |
}, | |
None => scan_for, | |
}; | |
pbar.add(inc_by as u64); | |
offset += inc_by; | |
} | |
pbar.finish(); | |
return Ok(img_count); | |
} | |
fn main() -> Result<(), Box<dyn error::Error>> { | |
let argv: Vec<String> = env::args().collect(); | |
if argv.len() != 2 { | |
panic!("usage: undelete input_file"); | |
} | |
let path = &argv[1]; | |
let mut file = File::open(path) | |
.expect(&format!("Failed to open {}", path)); | |
// block devices need special care for getting their sizes | |
let fsize = file.seek(SeekFrom::End(0))?; | |
let mfile = unsafe { MmapOptions::new().len(fsize as usize).map(&file)? }; | |
let img_count = undelete_jpg(&mfile)?; | |
println!("Recovered {} images", img_count); | |
Ok(()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment