Last active
January 9, 2024 08:34
-
-
Save Gordin508/fba1aa154f92b8bfd136c742c695a683 to your computer and use it in GitHub Desktop.
Advent of Code 2017 Day04 solution
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![allow(unused)] | |
#![allow(dead_code)] | |
/* | |
* An unoptimized implementation could simply use HashSets to solve the task, | |
* and the runtime of that is actually fine. However, HashSets are somewhat overkill | |
* for these inputs (only a handful of words per line). | |
* Therefore my idea is to implement alternative data structures which behave | |
* identically, but perform better for small input sets. | |
* | |
* My original idea was using probabilistic data structures with mock hash functions, | |
* which should be highly efficient for our small input size. | |
* However, I ended up scrapping a lot of implementation details, as the speedup only became noticable | |
* for significantly longer input lines. | |
*/ | |
// drop-in replacement for HashSets | |
trait BloomFilter<'a> { | |
fn new() -> Self; | |
fn insert(&mut self, word: &'a str) -> bool; | |
fn clear(&mut self); | |
} | |
// DS to store words as references to strings | |
struct WordBloomFilter<'a> { | |
bitmap: u32, | |
words: Vec<&'a str> | |
} | |
// DS to store words as unordered vec of chars | |
// (uses u8 internally as all inputs are 'a'..='z') | |
struct SimpleUnorderedBloomFilter { | |
bitmap: u64, | |
words: Vec<[u8;26]> | |
} | |
impl<'a> BloomFilter<'a> for WordBloomFilter<'a> { | |
fn new() -> WordBloomFilter<'a> { | |
WordBloomFilter{bitmap: 0, words: Vec::new()} | |
} | |
fn insert(&mut self, word: &'a str) -> bool { | |
// RESOLUTION is how many characters we consider for our filter | |
// instead of using RESOLUTION hash functions, we simply consider the first RESOLUTION chars | |
// so hash_1(word) == word[0], hash_2(word) == word[1] ... | |
// The value 2 is experimentally chosen | |
const RESOLUTION: usize = 2; | |
let mut nums = [0u32; RESOLUTION]; | |
for (i, n) in word.bytes().take(RESOLUTION).enumerate() { | |
nums[i] = 1 << (n - 0x61); // 0x61 == 'a' | |
} | |
if nums.iter().all(|n| self.bitmap & *n != 0 || *n == 0) { | |
if self.words.iter().any(|stored| *stored == word) { | |
return false | |
} | |
} | |
for n in nums { | |
self.bitmap |= n; | |
} | |
self.words.push(word); | |
true | |
} | |
fn clear(&mut self) { | |
self.bitmap = 0; | |
self.words.clear(); | |
} | |
} | |
impl<'a> BloomFilter<'a> for SimpleUnorderedBloomFilter { | |
fn new() -> SimpleUnorderedBloomFilter { | |
SimpleUnorderedBloomFilter{bitmap: 0, words: Vec::new()} | |
} | |
fn insert(&mut self, word: &str) -> bool { | |
// well, this once was like a bloom filter, but all my 'optimizations' | |
// made the code slower | |
// My guess is that my original ideas would have been | |
// faster if the passphrases were much longer | |
// convert word into an array of character counts | |
let mut charcounts = [0u8; 26]; | |
for b in word.bytes().map(|c| c - 0x61) { | |
charcounts[b as usize] += 1; | |
} | |
// we trust that no word is longer than 63 characters | |
// if we had such long words, we could simply omit this check | |
// as it saves almost no time anyway | |
let wordlen_bit = 1 << (word.len() - 1); | |
if wordlen_bit & self.bitmap > 0 && self.words.iter().any(|stored| *stored == charcounts) { | |
return false | |
} | |
self.bitmap |= wordlen_bit; | |
self.words.push(charcounts); | |
true | |
} | |
fn clear(&mut self) { | |
self.bitmap = 0; | |
self.words.clear(); | |
} | |
} | |
// passphrase policy of part 1 | |
fn password_valid_no_repeats(passphrase: &str) -> bool { | |
let mut seen = WordBloomFilter::new(); | |
password_valid(&mut seen, passphrase) | |
} | |
// passphrase policy of part 2 | |
fn password_valid_no_anagrams(passphrase: &str) -> bool { | |
let mut seen = SimpleUnorderedBloomFilter::new(); | |
password_valid(&mut seen, passphrase) | |
} | |
// passphrase policy is based on first argument | |
// This function allows to reuse the 'seen' argument across invocations. | |
// This increases speed, as the filter is internally backed by a vector, | |
// which is costly if rapidly reinitialized. | |
fn password_valid<'a, T: BloomFilter<'a>>(seen: &mut T, passphrase: &'a str) -> bool { | |
let result = passphrase.split(' ').all(|word| seen.insert(word)); | |
seen.clear(); | |
result | |
} | |
fn part1(lines: &Vec<&str>) -> Option<i64> { | |
let mut seen = WordBloomFilter::new(); | |
Some(lines | |
.iter() | |
.map(|line| password_valid(&mut seen, line)) | |
.filter(|b| *b) | |
.count() | |
as i64) | |
} | |
fn part2(lines: &Vec<&str>) -> Option<i64> { | |
let mut seen = SimpleUnorderedBloomFilter::new(); | |
Some(lines | |
.iter() | |
.map(|line| password_valid(&mut seen, line)) | |
.filter(|b| *b) | |
.count() | |
as i64) | |
} | |
fn main() { | |
use std::fs; | |
use std::env; | |
use std::time::Instant; | |
let args: Vec<String> = env::args().collect(); | |
let infile = args.get(1).unwrap_or_else(|| { | |
println!("Usage: {} <puzzle input>", args[0]); | |
std::process::exit(1); | |
}); | |
let contents = fs::read_to_string(infile) | |
.expect("Could not read in file"); | |
let lines: Vec<&str> = contents.lines().collect(); | |
// execute part 1 and part 2, print their results if they exist | |
// later parts may follow, so we loop over the part functions | |
let parts = [part1, part2]; | |
for (index, part) in parts.iter().enumerate() { | |
let partstart = Instant::now(); | |
let result = part(&lines); | |
match result { | |
Some(result) => println!("Part {}: {}\t({:?})", index+1, result, partstart.elapsed()), | |
None => println!("Part {}: No result", index+1), | |
} | |
} | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn valid_valid_simple() { | |
assert!(password_valid_no_repeats("aa bb cc dd ee")); | |
} | |
#[test] | |
fn valid_invalid_simple() { | |
assert!(!password_valid_no_repeats("aa bb cc dd aa")); | |
} | |
#[test] | |
fn valid_sharedprefix() { | |
assert!(password_valid_no_repeats("aa bb cc dd aaa")); | |
} | |
#[test] | |
fn valid_no_anagrams() { | |
assert!(password_valid_no_anagrams("abcde fghij")); | |
assert!(password_valid_no_anagrams("iiii oiii ooii oooi oooo")); | |
} | |
#[test] | |
fn invalid_anagrams() { | |
assert!(!password_valid_no_anagrams("abcde xyz ecdab")); | |
assert!(!password_valid_no_anagrams("oiii ioii iioi iiio")); | |
} | |
#[test] | |
fn valid_partial_anagrams() { | |
assert!(password_valid_no_anagrams("a ab abc abd abf abj")); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment