Created
August 22, 2016 16:03
-
-
Save cholcombe973/000d6e059b75b4a05349a1b691169c98 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! This program has been created to handle an incompatibility between how mysqldump escapes | |
//! some characters and how hive interprets those escaped chars. It does the following: | |
//! If you see an 0x5c30 in the input sequence | |
//! a. and there is no or even number of 0x5c before 0x5c30, translate this 0x5c30 to 0x00 | |
//! b. if there is odd number of 0x5c before 0x5c30, don't do anything. | |
//! Some sample transforms: | |
//! 0x5c30 => 0x00 | |
//! 0x5c5c30 => 0x5c5c30 | |
//! 0x5c5c5c30 => 0x5c5c00 | |
//! 0x5c5c5c5c30 => 0x5c5c5c5c30 | |
//! 0x5c5c5c3030 => 0x5c5c0030 | |
//! 0x5c5c5c5c3030 => 0x5c5c5c5c3030 | |
//! 0x5c5c5c40 => 0x5c5c5c40 | |
//! 0x5c5c5c5c40 => 0x5c5c5c5c40 | |
//! Here is another way to test: | |
//! - Create table with blob content: create table MyTest (id integer, value1 varchar(20), | |
//! content blob, value2 double, primary key(id)); | |
//! - Insert into blob content: insert into MyTest (id, value1, content, value2) | |
//! values (1, "data1", 0x3020090d0a2227005c30, 2.2); | |
//! - checking content: select hex(content) from MyTest; | |
//! - chmod a+rw /tmp/dump | |
//! - mysqldump -u root --tab=/tmp/dump --single-transaction -- create-options test | |
//! - see content: hexdump /tmp/dump/MyTest.txt | |
//! hexdump of original dump file: | |
//! 0000000 31 09 64 61 74 61 31 09 30 20 5c 09 0d 5c 0a 22 | |
//! 0000010 27 5c 30 5c 5c 30 09 32 2e 32 0a | |
//! 000001b | |
//! hexdump after passing through this program: | |
//! 0000000 31 09 64 61 74 61 31 09 30 20 5c 09 0d 5c 0a 22 | |
//! 0000010 27 00 5c 5c 30 09 32 2e 32 0a | |
//! 000001a | |
//! Author : vamsi Nov 2015 | |
#[macro_use] | |
extern crate log; | |
extern crate simplelog; | |
use simplelog::SimpleLogger; | |
use simplelog::LogLevelFilter; | |
use std::io::{BufReader, BufWriter}; | |
use std::io; | |
use std::io::ErrorKind; | |
use std::io::prelude::*; | |
#[test] | |
fn test_transform() { | |
// 0x5c30 => 0x00 | |
let test1 = &[0x5c, 0x30]; | |
let mut input = std::io::Cursor::new(test1); | |
let mut output: Vec<u8> = Vec::new(); | |
process_input(&mut input, &mut output); | |
println!("output1: {:?}", output); | |
assert_eq!(output, vec![0x00]); | |
// TODO: Broken | |
// 0x5c5c30 => 0x5c5c30 //Broken | |
let test2 = &[0x5c, 0x5c, 0x30]; | |
let mut input2 = std::io::Cursor::new(test2); | |
let mut output2: Vec<u8> = Vec::new(); | |
process_input(&mut input2, &mut output2); | |
println!("output2: {:?}", output2); | |
assert_eq!(output2, vec![0x5c, 0x5c, 0x30]); | |
// 0x5c5c5c30 => 0x5c5c00 | |
let test3 = &[0x5c, 0x5c, 0x5c, 0x30]; | |
let mut input3 = std::io::Cursor::new(test3); | |
let mut output3: Vec<u8> = Vec::new(); | |
process_input(&mut input3, &mut output3); | |
println!("output3: {:?}", output3); | |
assert_eq!(output3, vec![0x5c, 0x5c, 0x00]); | |
// 0x5c5c5c5c30 => 0x5c5c5c5c30 | |
let test4 = &[0x5c, 0x5c, 0x5c, 0x5c, 0x30]; | |
let mut input4 = std::io::Cursor::new(test4); | |
let mut output4: Vec<u8> = Vec::new(); | |
process_input(&mut input4, &mut output4); | |
println!("output4: {:?}", output4); | |
assert_eq!(output4, vec![0x5c, 0x5c, 0x5c, 0x5c, 0x30]); | |
// 0x5c5c5c3030 => 0x5c5c0030 | |
let test5 = &[0x5c, 0x5c, 0x5c, 0x30, 0x30]; | |
let mut input5 = std::io::Cursor::new(test5); | |
let mut output5: Vec<u8> = Vec::new(); | |
process_input(&mut input5, &mut output5); | |
println!("output5: {:?}", output5); | |
assert_eq!(output5, vec![0x5c, 0x5c, 0x00, 0x30]); | |
} | |
fn write_byte<W>(byte: u8, writer: &mut W) -> Result<usize, std::io::Error> | |
where W: Write | |
{ | |
let written_bytes = try!(writer.write(&[byte])); | |
Ok(written_bytes) | |
} | |
fn process_input<R, W>(mut reader: R, mut writer: W) -> Result<(), std::io::Error> | |
where R: BufRead, | |
W: Write | |
{ | |
// As long as there's another byte this loop will continue | |
let mut done = false; | |
let mut buffer = [0; 1024 * 128]; | |
while !done { | |
let read_size = try!(reader.read(&mut buffer[..])); | |
if read_size > 0 { | |
let mut count: u64 = 0; | |
let mut buf_position: usize = 0; | |
// This inner loop is so we fully read the buffer before trying to read again | |
'inner: loop { | |
if buf_position >= read_size { | |
break; | |
} | |
// Fast forward through anything that isn't 0x5c | |
for _ in buf_position..read_size { | |
if buffer[buf_position] != 0x5c { | |
try!(write_byte(buffer[buf_position], &mut writer)); | |
buf_position += 1; | |
continue; | |
} | |
} | |
// Advance to the next position | |
buf_position += 1; | |
// Now handle the cases | |
for _ in buf_position..read_size { | |
if buffer[buf_position] == 0x30 { | |
if count % 2 == 0 { | |
// we saw 0 or even number of 0x5c before 0x5c30 | |
try!(write_byte(0x00, &mut writer)); | |
buf_position += 1; | |
break; | |
} else { | |
// we saw odd number of 0x5c before 0x5c30. put the outstanding 0c5c | |
// in the output, | |
// and then 0x30 | |
// | |
try!(write_byte(0x5c, &mut writer)); | |
try!(write_byte(0x30, &mut writer)); | |
buf_position += 1; | |
break; | |
} | |
} else if buffer[buf_position] == 0x5c { | |
try!(write_byte(0x5c, &mut writer)); | |
buf_position += 1; | |
count += 1; | |
} else { | |
// put the outstanding 0x5c and the char we just read in output | |
try!(write_byte(0x5c, &mut writer)); | |
try!(write_byte(buffer[buf_position], &mut writer)); | |
buf_position += 1; | |
break; | |
} | |
} | |
} | |
} else { | |
done = true; | |
} | |
} | |
// 'outer: loop { | |
// let next_byte = reader.by_ref().bytes().next(); | |
// | |
// match next_byte { | |
// Some(read_byte) => { | |
// let read_byte = try!(read_byte); | |
// | |
// Fast forward through bytes that don't match 0x5c | |
// if read_byte != 0x5c { | |
// try!(write_byte(read_byte, &mut writer)); | |
// continue; | |
// } | |
// | |
// let mut count: u64 = 0; | |
// for byte in reader.by_ref().bytes() { | |
// let read_byte = byte.unwrap(); | |
// if read_byte == 0x30 { | |
// if count % 2 == 0 { | |
// we saw 0 or even number of 0x5c before 0x5c30 | |
// try!(write_byte(0x00, &mut writer)); | |
// break; | |
// } else { | |
// we saw odd number of 0x5c before 0x5c30. put the outstanding 0c5c | |
// in the output, | |
// and then 0x30 | |
// | |
// try!(write_byte(0x5c, &mut writer)); | |
// try!(write_byte(0x30, &mut writer)); | |
// break; | |
// } | |
// } else if read_byte == 0x5c { | |
// try!(write_byte(0x5c, &mut writer)); | |
// count += 1; | |
// } else { | |
// put the outstanding 0x5c and the char we just read in output | |
// try!(write_byte(0x5c, &mut writer)); | |
// try!(write_byte(read_byte, &mut writer)); | |
// break; | |
// } | |
// } | |
// } | |
// None => { | |
// break 'outer; | |
// } | |
// } | |
// } | |
// | |
// EOF | |
// writer will flush when dropped | |
Ok(()) | |
} | |
fn main() { | |
let _ = SimpleLogger::init(LogLevelFilter::Trace); | |
// let stdin = io::stdin(); | |
// let mut stdin = stdin.lock(); | |
let mut stdin = BufReader::with_capacity(256 * 1024, io::stdin()); | |
// BufWriter with 128K capacity. Try to make our writes large for efficient | |
// downstream consumption | |
let mut writer = BufWriter::with_capacity(256 * 1024, io::stdout()); | |
match process_input(&mut stdin, &mut writer) { | |
Ok(_) => {} | |
Err(e) => { | |
error!("Failed with error: {}", e); | |
} | |
}; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment