Last active
April 14, 2023 01:13
-
-
Save shepmaster/fb7f4c9519a074ea7186ca7b75afb9dd to your computer and use it in GitHub Desktop.
SNAFU-modified version of https://sabrinajewson.org/blog/errors
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! This crate provides types for UCD’s `Blocks.txt`. | |
pub struct Blocks { | |
ranges: Vec<(RangeInclusive<u32>, String)>, | |
} | |
impl Blocks { | |
pub fn block_of(&self, c: char) -> &str { | |
self.ranges | |
.binary_search_by(|(range, _)| { | |
if *range.end() < u32::from(c) { | |
cmp::Ordering::Less | |
} else if u32::from(c) < *range.start() { | |
cmp::Ordering::Greater | |
} else { | |
cmp::Ordering::Equal | |
} | |
}) | |
.map(|i| &*self.ranges[i].1) | |
.unwrap_or("No_Block") | |
} | |
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, FromFileError> { | |
let path = path.as_ref(); | |
let tmp0 = fs::read_to_string(path).context(from_file_error::ReadFileSnafu { path })?; | |
Self::from_str(&tmp0).context(from_file_error::ParseSnafu { path }) | |
} | |
pub fn download(agent: &ureq::Agent) -> Result<Self, DownloadError> { | |
let response = agent | |
.get(LATEST_URL) | |
.call() | |
.context(download_error::RequestSnafu)?; | |
Self::from_str( | |
&response | |
.into_string() | |
.context(download_error::ReadBodySnafu)?, | |
) | |
.context(download_error::ParseSnafu) | |
} | |
} | |
impl FromStr for Blocks { | |
type Err = ParseError; | |
fn from_str(s: &str) -> Result<Self, Self::Err> { | |
let ranges = s | |
.lines() | |
.enumerate() | |
.map(|(i, line)| { | |
( | |
i, | |
line.split_once('#').map(|(line, _)| line).unwrap_or(line), | |
) | |
}) | |
.filter(|(_, line)| !line.is_empty()) | |
.map(|(i, line)| { | |
(|| { | |
let (range, name) = line.split_once(';').context(NoSemicolonSnafu)?; | |
let (range, name) = (range.trim(), name.trim()); | |
let (start, end) = range.split_once("..").context(NoDotDotSnafu)?; | |
let start = u32::from_str_radix(start, 16).context(ParseIntSnafu)?; | |
let end = u32::from_str_radix(end, 16).context(ParseIntSnafu)?; | |
Ok((start..=end, name.to_owned())) | |
})() | |
.context(ParseSnafu { line: i }) | |
}) | |
.collect::<Result<Vec<_>, ParseError>>()?; | |
Ok(Self { ranges }) | |
} | |
} | |
#[derive(Debug, Snafu)] | |
#[non_exhaustive] | |
#[snafu(module)] | |
pub enum DownloadError { | |
#[snafu(display("failed to download Blocks.txt from the Unicode website"))] | |
Request { | |
#[snafu(source(from(ureq::Error, Box::new)))] | |
source: Box<ureq::Error>, | |
}, | |
#[snafu(display("failed to download Blocks.txt from the Unicode website"))] | |
ReadBody { source: io::Error }, | |
#[snafu(display("failed to download Blocks.txt from the Unicode website"))] | |
Parse { source: ParseError }, | |
} | |
#[derive(Debug, Snafu)] | |
#[non_exhaustive] | |
#[snafu(module)] | |
pub enum FromFileError { | |
#[snafu(display("error reading `{}`", path.display()))] | |
ReadFile { source: io::Error, path: Box<Path> }, | |
#[snafu(display("error reading `{}`", path.display()))] | |
Parse { source: ParseError, path: Box<Path> }, | |
} | |
#[derive(Debug, Snafu)] | |
#[non_exhaustive] | |
#[snafu(display("invalid Blocks.txt data on line {}", self.line + 1))] | |
pub struct ParseError { | |
pub line: usize, | |
pub source: ParseErrorKind, | |
} | |
#[derive(Debug, Snafu)] | |
pub enum ParseErrorKind { | |
#[non_exhaustive] | |
#[snafu(display("no semicolon"))] | |
NoSemicolon, | |
#[non_exhaustive] | |
#[snafu(display("no `..` in range"))] | |
NoDotDot, | |
#[non_exhaustive] | |
#[snafu(display("one end of range is not a valid hexadecimal integer"))] | |
ParseInt { source: ParseIntError }, | |
} | |
#[cfg(test)] | |
mod tests { | |
#[test] | |
fn real_unicode() { | |
let data = include_str!("../Blocks.txt").parse::<Blocks>().unwrap(); | |
assert_eq!(data.block_of('\u{0080}'), "Latin-1 Supplement"); | |
assert_eq!(data.block_of('½'), "Latin-1 Supplement"); | |
assert_eq!(data.block_of('\u{00FF}'), "Latin-1 Supplement"); | |
assert_eq!(data.block_of('\u{EFFFF}'), "No_Block"); | |
} | |
use crate::Blocks; | |
} | |
pub const LATEST_URL: &str = "https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt"; | |
use std::cmp; | |
use std::fs; | |
use std::io; | |
use std::num::ParseIntError; | |
use std::ops::RangeInclusive; | |
use std::path::Path; | |
use std::str::FromStr; | |
use snafu::prelude::*; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment