Last active
July 19, 2023 19:23
-
-
Save ksurent/d68a14c223c2cee9caac16ce0e0ebc46 to your computer and use it in GitHub Desktop.
A silly program that finds CPAN modules that could be deliberate typos. See https://threatpost.com/attackers-use-typo-squatting-to-steal-npm-credentials/127235/.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Download index file: wget https://www.cpan.org/modules/02packages.details.txt. | |
use std::{ | |
collections::HashSet, | |
fmt::Display, | |
fs::File, | |
io::{self, BufReader}, | |
}; | |
fn main() -> io::Result<()> { | |
let fh = File::open("02packages.details.txt")?; | |
let rd = BufReader::new(fh); | |
let modules = read_index(rd)?; | |
let candidates = find_squatters(modules.as_slice()); | |
for c in candidates { | |
println!("{}", c); | |
} | |
Ok(()) | |
} | |
#[derive(PartialEq, Debug)] | |
struct Module { | |
name: String, | |
author: String, | |
} | |
#[derive(PartialEq, Debug)] | |
struct Candidate<'a> { | |
left: &'a Module, | |
right: &'a Module, | |
distance: f32, | |
} | |
impl<'a> Display for Candidate<'a> { | |
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
write!( | |
f, | |
"{} {} {} {} {}", | |
self.left.name, self.left.author, self.right.name, self.right.author, self.distance | |
) | |
} | |
} | |
fn read_index<'a, R: io::BufRead>(rd: R) -> io::Result<Vec<Module>> { | |
const HEADER_AND_EMPTY_LINE: usize = 9; | |
const MOD_NAME: usize = 0; | |
const MOD_PATH: usize = 2; | |
let skip_prefixes = [ | |
"Google::Ads::AdWords", | |
"Acme::CPANModulesBundle::Import::PerlAdvent", | |
"Acme::CPANModules::Import::PerlDancerAdvent", | |
"Acme::CPANModules::Import::PerlAdvent", | |
"Acme::CPANModules::Import::MojoliciousAdvent", | |
]; | |
let mut modules = Vec::new(); | |
let mut lines = rd.lines().skip(HEADER_AND_EMPTY_LINE); | |
loop { | |
match lines.next() { | |
Some(Ok(l)) => { | |
let pieces = l.split_ascii_whitespace().collect::<Vec<_>>(); | |
assert_eq!(pieces.len(), 3); | |
let name = pieces[MOD_NAME]; | |
if skip_prefixes.iter().any(|&x| name.starts_with(x)) { | |
continue; | |
} | |
let path = pieces[MOD_PATH]; | |
let cutoff = path.rfind("/").unwrap_or(path.len()); | |
modules.push(Module { | |
name: name.to_string(), | |
author: path[..cutoff].to_string(), | |
}); | |
} | |
Some(Err(e)) => return Err(e), | |
None => break, | |
} | |
} | |
Ok(modules) | |
} | |
fn find_squatters<'a>(modules: &'a [Module]) -> Vec<Candidate<'a>> { | |
let mut seen = HashSet::with_capacity(modules.len()); | |
let mut candidates = Vec::with_capacity(modules.len()); | |
let kbd = Keyboard::new(); | |
for m1 in modules.iter() { | |
let b1 = m1.name.as_bytes(); | |
for m2 in modules.iter() { | |
if m1.name == m2.name || m1.author == m2.author { | |
continue; | |
} | |
let b2 = m2.name.as_bytes(); | |
let distance = kbd.distance(b1, b2); | |
if distance < 2f32 && !seen.contains(b2) { | |
candidates.push(Candidate { | |
left: m1, | |
right: m2, | |
distance, | |
}); | |
seen.insert(b1); | |
seen.insert(b2); | |
} | |
} | |
} | |
candidates | |
} | |
#[derive(Copy, Clone)] | |
struct Key { | |
x: u8, | |
y: u8, | |
} | |
impl Key { | |
fn distance(&self, other: Key) -> f32 { | |
let dx = other.x as f32 - self.x as f32; | |
let dy = other.y as f32 - self.y as f32; | |
(dx * dx + dy * dy).sqrt() | |
} | |
} | |
impl From<usize> for Key { | |
fn from(pos: usize) -> Self { | |
let x = (pos % 10) as u8; | |
let y = (pos / 10) as u8; | |
Self { x, y } | |
} | |
} | |
#[rustfmt::skip] | |
static LAYOUT: [u8; 40] = [ | |
b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'0', | |
b'q', b'w', b'e', b'r', b't', b'y', b'u', b'i', b'o', b'p', | |
b'a', b's', b'd', b'f', b'g', b'h', b'j', b'k', b'l', b';', | |
b'z', b'x', b'c', b'v', b'b', b'n', b'm', b',', b'.', b'/', | |
]; | |
#[rustfmt::skip] | |
static LAYOUT_SHIFTED: [u8; 40] = [ | |
b'!', b'@', b'#', b'$', b'%', b'^', b'&', b'*', b'(', b')', | |
b'Q', b'W', b'E', b'R', b'T', b'Y', b'U', b'I', b'O', b'P', | |
b'A', b'S', b'D', b'F', b'G', b'H', b'J', b'K', b'L', b':', | |
b'Z', b'X', b'C', b'V', b'B', b'N', b'M', b'<', b'>', b'?', | |
]; | |
struct Keyboard { | |
keys: [Key; 255], | |
} | |
impl Keyboard { | |
fn new() -> Keyboard { | |
let mut keys = [Key { x: 0, y: 0 }; 255]; | |
for (i, ch) in LAYOUT.iter().copied().enumerate() { | |
keys[ch as usize] = Key::from(i); | |
} | |
for (i, ch) in LAYOUT_SHIFTED.iter().copied().enumerate() { | |
keys[ch as usize] = Key::from(i); | |
} | |
Keyboard { keys } | |
} | |
#[inline] | |
fn as_key(&self, c: u8) -> Key { | |
unsafe { *self.keys.get_unchecked(c as usize) } | |
} | |
#[inline] | |
fn key_distance(&self, c1: u8, c2: u8) -> f32 { | |
if c1 == c2 { | |
return 0f32; | |
} | |
self.as_key(c1).distance(self.as_key(c2)) | |
} | |
fn distance(&self, s1: &[u8], s2: &[u8]) -> f32 { | |
if s1.len() != s2.len() { | |
return std::f32::MAX; | |
} | |
if s1 == s2 { | |
return 0f32; | |
} | |
let mut dist = 0f32; | |
for (i, _) in s1.iter().enumerate() { | |
dist += self.key_distance(s1[i], s2[i]); | |
} | |
dist | |
// s1.iter() | |
// .copied() | |
// .enumerate() | |
// .fold(0f32, |acc, (i, c1)| acc + self.char_distance(c1, s2[i])) | |
} | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn test_keyboard_distance() { | |
let kbd = Keyboard::new(); | |
assert_eq!( | |
kbd.distance("fooo".as_bytes(), "foo".as_bytes()), | |
std::f32::MAX, | |
"extra letter left" | |
); | |
assert_eq!( | |
kbd.distance("foo".as_bytes(), "fooo".as_bytes()), | |
std::f32::MAX, | |
"extra letter right" | |
); | |
assert_eq!( | |
kbd.distance("foo".as_bytes(), "foo".as_bytes()), | |
0f32, | |
"same" | |
); | |
assert_eq!( | |
kbd.distance("foo".as_bytes(), "FOO".as_bytes()), | |
0f32, | |
"same but shifted" | |
); | |
assert_eq!( | |
kbd.distance("foo".as_bytes(), "fop".as_bytes()), | |
1f32, | |
"typo squat" | |
); | |
assert_eq!( | |
kbd.distance("foo".as_bytes(), "foP".as_bytes()), | |
1f32, | |
"typo squat shifted" | |
); | |
} | |
#[test] | |
fn test_key_distance() { | |
assert_eq!( | |
(Key { x: 0, y: 0 }).distance(Key { x: 0, y: 0 }), | |
0f32, | |
"same" | |
); | |
assert_eq!( | |
(Key { x: 0, y: 0 }).distance(Key { x: 1, y: 0 }), | |
1f32, | |
"one to the right" | |
); | |
let distance = (Key { x: 0, y: 0 }).distance(Key { x: 1, y: 1 }); | |
let expected = 1.414214f32; | |
let epsilon = 0.0001f32; | |
assert!( | |
(distance - expected).abs() < epsilon, | |
"one to the right diagonally" | |
); | |
} | |
#[test] | |
fn test_read_from_file() { | |
let lines = r#" | |
File: 02packages.details.txt | |
URL: http://www.perl.com/CPAN/modules/02packages.details.txt | |
Description: Package names found in directory $CPAN/authors/id/ | |
Columns: package name, version, path | |
Intended-For: Automated fetch routines, namespace documentation. | |
Written-By: PAUSE version 1.005 | |
Line-Count: 42 | |
Last-Updated: Tue, 07 Jul 2020 12:56:01 GMT | |
A 0.01 A/AA/AAA/A-0.01.tar.gz | |
B 0.02 B/BB/BBB/B-0.02.tar.gz | |
C undef C/CC/CCC/C-0.03.tar.gz | |
"#; | |
let modules = read_index(lines.trim().as_bytes()).expect("parse"); | |
assert_eq!( | |
modules, | |
Vec::from([ | |
Module { | |
name: "A".to_string(), | |
author: "A/AA/AAA".to_string(), | |
}, | |
Module { | |
name: "B".to_string(), | |
author: "B/BB/BBB".to_string(), | |
}, | |
Module { | |
name: "C".to_string(), | |
author: "C/CC/CCC".to_string(), | |
}, | |
]), | |
"correct modules", | |
) | |
} | |
#[test] | |
fn test_find_squatters() { | |
let modules = [ | |
Module { | |
name: "Important::Stuff".to_string(), | |
author: "A/AA/AAA".to_string(), | |
}, | |
Module { | |
name: "Imp0rtant::Stuff".to_string(), | |
author: "B/BB/BBB".to_string(), | |
}, | |
Module { | |
name: "Acme::Whatever".to_string(), | |
author: "C/CC/CCC".to_string(), | |
}, | |
]; | |
let candidates = find_squatters(&modules); | |
assert_eq!( | |
candidates, | |
Vec::from([Candidate { | |
left: &modules[0], | |
right: &modules[1], | |
distance: 1.4142135, | |
}]) | |
); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment