Skip to content

Instantly share code, notes, and snippets.

@sandersaares
Last active October 19, 2025 13:33
Show Gist options
  • Select an option

  • Save sandersaares/4c74ab951f02af9ad079d98e46711287 to your computer and use it in GitHub Desktop.

Select an option

Save sandersaares/4c74ab951f02af9ad079d98e46711287 to your computer and use it in GitHub Desktop.
poetry contest
#[derive(Deserialize)]
struct Manifest {
categories: Vec<Category>,
rounds: Vec<PathBuf>,
}
#[derive(Deserialize)]
struct Category {
keywords: Vec<String>,
}
#[derive(Deserialize)]
struct Round {
entries: Vec<Entry>,
}
#[derive(Deserialize)]
struct Entry {
author: String,
title: String,
contents: String,
}
// Function gained lifetime parameters.
fn solve_round<'manifest, 'round>(
// An owned String became a borrowed &'round str.
round_json: &'round str,
keyword_to_categories: &HashMap<&str, Vec<usize>>,
points_by_author: &mut HashMap<String, u64>,
) { ... }
// The deserialized object gets the lifetime of the input string.
let round: Round<'round> = serde_json::from_str(&round_json).unwrap();
// Vec<&str> became Vec<Cow<'round, str>>
let mut best_by_category: HashMap<usize, (f64, Vec<Cow<'round, str>>)> = HashMap::new();
// More lifetime parameters!
fn parse_entries<'round>(round: Round<'round>) -> Vec<Entry<'round>> { ... }
fn solve() -> u64 { ... }
fn solve_inner(data_dir: PathBuf, manifest_json: String) -> u64 { ... }
fn solve_round(
round_json: String,
keyword_to_categories: &HashMap<&str, Vec<usize>>,
points_by_author: &mut HashMap<String, u64>,
) { ... }
fn parse_entries(round: Round) -> Vec<Entry> { ... }
fn calculate_weight(content: &str) -> f64 { ... }
use criterion::{Criterion, criterion_group, criterion_main};
use poetry_contest::v01_simple;
fn entrypoint(c: &mut Criterion) {
c.bench_function("v01_simple", |b| {
b.iter(|| {
v01_simple::solve();
});
});
}
criterion_group!(benches, entrypoint);
criterion_main!(benches);
use alloc_tracker::{Allocator, Session};
use criterion::{Criterion, criterion_group, criterion_main};
use poetry_contest::v01_simple;
#[global_allocator]
static ALLOCATOR: Allocator<std::alloc::System> = Allocator::system();
fn entrypoint(c: &mut Criterion) {
let allocs = Session::new();
let allocs_op = allocs.operation("v01_simple");
c.bench_function("v01_simple", |b| {
b.iter(|| {
let _span = allocs_op.measure_thread();
v01_simple::solve();
});
});
allocs.print_to_stdout();
}
criterion_group!(benches, entrypoint);
criterion_main!(benches);
[profile.release]
# Ensure high quality data with proper stack traces when profiling.
debug = "line-tables-only"
/// When profiling, it is important to always use a fixed number of
/// iterations so that the results are comparable between runs.
const ITERATIONS: usize = 25;
fn main() {
for _ in 0..ITERATIONS {
poetry_contest::v01_simple::solve();
}
}
for cat_idx in matched_categories {
let entry_author = entry.author.clone();
// ...
}
fn solve_round(
round_json: String,
keyword_to_categories: &HashMap<&str, Vec<usize>>,
points_by_author: &mut HashMap<String, u64>,
) {
// Vec<String> became Vec<&str>
let mut best_by_category: HashMap<usize, (f64, Vec<&str>)> = HashMap::new();
// ...
for entry in &entries {
// ...
for cat_idx in matched_categories {
// entry.author.clone() became a &str reference
let entry_author: &str = &entry.author;
}
}
// ...
}
fn solve_round(
round_json: String,
keyword_to_categories: &HashMap<&str, Vec<usize>>,
points_by_author: &mut HashMap<String, u64>,
) {
// ...
for (_, (_, authors)) in best_by_category {
for author in authors {
// This uses a &str to lookup an existing entry.
if let Some(existing_entry) = points_by_author.get_mut(author) {
*existing_entry += 1;
continue;
}
// Create a String only if we need to create a new entry.
points_by_author.insert(author.to_owned(), 1);
}
}
}
#[derive(Deserialize)]
struct Entry {
    author: String,
    title: String,
    contents: String,
}
#[derive(Deserialize)]
struct Manifest<'json> {
#[serde(borrow)]
categories: Vec<Category<'json>>,
#[serde(borrow)]
rounds: Vec<Cow<'json, Path>>,
}
#[derive(Deserialize)]
struct Category<'json> {
#[serde(borrow)]
keywords: Vec<Cow<'json, str>>,
}
#[derive(Deserialize)]
struct Round<'json> {
#[serde(borrow)]
entries: Vec<Entry<'json>>,
}
#[derive(Deserialize)]
struct Entry<'json> {
#[serde(borrow)]
author: Cow<'json, str>,
#[serde(borrow)]
title: Cow<'json, str>,
#[serde(borrow)]
contents: Cow<'json, str>,
}
fn validate_cow_borrowing_manifest<'a>(manifest_json: &'a str) {
let manifest: Manifest = serde_json::from_str(manifest_json).unwrap();
let keyword = manifest
.categories
.first()
.unwrap()
.keywords
.first()
.unwrap();
let round_path = manifest.rounds.first().unwrap();
// serde_json is not capable of deserializing into a Vec of borrowed Cow,
// so these will always be Cow::Owned, even when logically borrowable.
assert!(matches!(keyword, Cow::Owned(_)));
assert!(matches!(round_path, Cow::Owned(_)));
}
fn validate_cow_borrowing_round<'a>(round_json: &'a str) {
let round: Round = serde_json::from_str(round_json).unwrap();
let entry = round.entries.first().unwrap();
assert!(matches!(entry.author, Cow::Borrowed(_)));
assert!(matches!(entry.title, Cow::Borrowed(_)));
// Contents must be transformed first (newlines unescaped), so cannot be borrowed.
assert!(matches!(entry.contents, Cow::Owned(_)));
}
#[serde_as]
#[derive(Deserialize)]
struct Category<'json> {
    #[serde_as(as = "Vec<BorrowCow>")]
    keywords: Vec<Cow<'json, str>>,
}
#[derive(Deserialize)]
struct Entry<'json> {
#[serde(borrow)]
author: Cow<'json, str>,
#[serde(borrow)]
title: Cow<'json, str>,
// This will be a raw JSON string, quotes included,
// like: "Some content\nsecond line"
contents: &'json RawValue,
}
thread_local! {
// We reuse this buffer for decoding RawValue contents to avoid repeated allocations.
// Entries greater than 1000 bytes long (decoded) are disqualified, so we only need 1000 bytes.
static DECODE_BUFFER: RefCell<[u8; 1000]> = RefCell::new([0; 1000]);
}
/// Returns None if the entry is disqualified due to length or emptiness.
fn calculate_weight(raw_content: &RawValue) -> Option<f64> {
let raw = raw_content.get();
// We expect it to be a quoted JSON string.
if raw.len() < 2 || !raw.starts_with('"') || !raw.ends_with('"') {
return None;
}
// Remove the quotes.
let raw = &raw[1..raw.len() - 1];
let unescaped = json_escape::unescape(raw);
DECODE_BUFFER.with_borrow_mut(|buffer| {
// How much of the decode buffer is already used.
// If we ever overflow the buffer, the entry is disqualified due to length.
let mut len = 0;
// We first collect (copy) all the bytes into our input buffer.
for chunk in unescaped {
let Ok(chunk) = chunk else {
// Disqualified due to invalid escape sequence.
return None;
};
if len + chunk.len() > buffer.len() {
// Disqualified due to length.
return None;
}
buffer[len..len + chunk.len()].copy_from_slice(chunk);
len += chunk.len();
}
let Ok(content) = str::from_utf8(&buffer[..len]) else {
// Disqualified due to invalid UTF-8.
return None;
};
if content.trim().is_empty() {
// Disqualified due to emptiness.
return None;
}
let length = content.len() as f64;
let word_count = content.split_whitespace().count() as f64;
Some(length / word_count)
})
}
// We reuse the same String for reading round files to avoid repeated allocations.
let mut round_json = String::new();
for round_path in &manifest.rounds {
let as_path = Path::new(&**round_path);
let round_file_path = data_dir.join(&as_path);
round_json.clear();
File::open(round_file_path)
.expect("Failed to open round file")
.read_to_string(&mut round_json)
.expect("Failed to read round file");
solve_round(
&manifest,
&round_json,
&keyword_to_categories,
&mut points_by_author,
);
}
use foldhash::{HashMap, HashMapExt};
// Code unchanged - just for reference.
let mut keyword_to_categories: HashMap<&str, Vec<usize>>
= HashMap::with_capacity(keyword_count);
// Code unchanged - just for reference.
let mut points_by_author: HashMap<String, u64>
= HashMap::new();
// Code unchanged - just for reference.
let mut best_by_category: HashMap<usize, (f64, Vec<Cow<'round, str>>)>
= HashMap::with_capacity(manifest.categories.len());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment