quad · October 31, 2025 19:58
diff --git a/0-modular-errors-with-rusts-thiserror.md b/0-modular-errors-with-rusts-thiserror.md
diff --git a/BadBlocks.txt b/BadBlocks.txt
 # test comment
 0080..00FF; Latin-1 Supplement
 00j0..007F; Basic Latin
diff --git a/Blocks.txt b/Blocks.txt
 0000..007F; Basic Latin
 0080..00FF; Latin-1 Supplement
 0100..017F; Latin Extended-A
 0180..024F; Latin Extended-B
 0250..02AF; IPA Extensions
diff --git a/Cargo.toml b/Cargo.toml
 [package]
 name = "rust-error-styles"
 version = "0.1.0"
 edition = "2021"

 [dependencies]
 anyhow = "1.0.70"
 testresult = "0.3.0"
 thiserror = "1.0.40"
 ureq = "2.6.2"
diff --git a/lib.rs b/lib.rs
 //! This crate provides types for UCD’s `Blocks.txt`.

 #[derive(Debug)]
 pub struct Blocks {
    ranges: Vec<(RangeInclusive<u32>, String)>,
 }

 impl Blocks {
    pub fn block_of(&self, c: char) -> &str {
        self.ranges
            .binary_search_by(|(range, _)| {
                if *range.end() < u32::from(c) {
                    cmp::Ordering::Less
                } else if u32::from(c) < *range.start() {
                    cmp::Ordering::Greater
                } else {
                    cmp::Ordering::Equal
                }
            })
            .map(|i| &*self.ranges[i].1)
            .unwrap_or("No_Block")
    }
    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, FromFileError> {
        let path = path.as_ref();
        let from_file = || Ok(Self::from_str(&fs::read_to_string(path)?)?);
        from_file().map_err(|source| FromFileError {
            path: path.into(),
            source,
        })
    }
    pub fn download(agent: &ureq::Agent) -> Result<Self, DownloadError> {
        let download = || -> Result<_, DownloadErrorKind> {
            let response = agent.get(LATEST_URL).call().map_err(Box::new)?;
            Ok(Self::from_str(&response.into_string()?)?)
        };
        Ok(download()?)
    }
 }

 impl FromStr for Blocks {
    type Err = ParseError;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        let ranges = s
            .lines()
            // I couldn't help myself
            .map(|line| line.split_once('#').map(|(line, _)| line).unwrap_or(line))
            .enumerate()
            .filter(|(_, line)| !line.is_empty())
            .map(|(i, line)| {
                (|| {
                    let (range, name) = line.split_once(';').ok_or(ParseErrorKind::NoSemicolon)?;
                    let (range, name) = (range.trim(), name.trim());
                    let (start, end) = range.split_once("..").ok_or(ParseErrorKind::NoDotDot)?;
                    let start = u32::from_str_radix(start, 16)?;
                    let end = u32::from_str_radix(end, 16)?;
                    Ok((start..=end, name.to_owned()))
                })()
                .map_err(|source| ParseError { line: i, source })
            })
            .collect::<Result<Vec<_>, ParseError>>()?;
        Ok(Self { ranges })
    }
 }

 #[derive(Debug, thiserror::Error)]
 #[error("failed to download Blocks.txt from the Unicode website")]
 #[non_exhaustive]
 pub struct DownloadError(#[from] pub DownloadErrorKind);

 #[derive(Debug, thiserror::Error)]
 #[error(transparent)]
 pub enum DownloadErrorKind {
    Request(#[from] Box<ureq::Error>),
    ReadBody(#[from] io::Error),
    Parse(#[from] ParseError),
 }

 #[derive(Debug, thiserror::Error)]
 #[error("error reading `{path}`")]
 #[non_exhaustive]
 pub struct FromFileError {
    pub path: Box<Path>,
    pub source: FromFileErrorKind,
 }

 #[derive(Debug, thiserror::Error)]
 #[error(transparent)]
 pub enum FromFileErrorKind {
    ReadFile(#[from] io::Error),
    Parse(#[from] ParseError),
 }

 #[derive(Debug, thiserror::Error)]
 #[error("invalid Blocks.txt data on line {}", self.line + 1)]
 #[non_exhaustive]
 pub struct ParseError {
    pub line: usize,
    pub source: ParseErrorKind,
 }

 #[derive(Debug, thiserror::Error)]
 pub enum ParseErrorKind {
    #[error("no semicolon")]
    #[non_exhaustive]
    NoSemicolon,
    #[error("no `..` in range")]
    #[non_exhaustive]
    NoDotDot,
    #[error("one end of range is not a valid hexadecimal integer")]
    #[non_exhaustive]
    ParseInt(#[from] ParseIntError),
 }

 #[cfg(test)]
 mod tests {
    #[test]
    fn real_unicode() -> TestResult {
        let data = Blocks::download(&ureq::agent())?;
        assert_eq!(data.block_of('\u{0080}'), "Latin-1 Supplement");
        assert_eq!(data.block_of('½'), "Latin-1 Supplement");
        assert_eq!(data.block_of('\u{00FF}'), "Latin-1 Supplement");
        assert_eq!(data.block_of('\u{EFFFF}'), "No_Block");
        Ok(())
    }

    #[test]
    fn test_unicode() -> TestResult {
        let data = include_str!("../Blocks.txt").parse::<Blocks>()?;
        assert_eq!(data.block_of('\u{0080}'), "Latin-1 Supplement");
        assert_eq!(data.block_of('½'), "Latin-1 Supplement");
        assert_eq!(data.block_of('\u{00FF}'), "Latin-1 Supplement");
        assert_eq!(data.block_of('\u{EFFFF}'), "No_Block");
        Ok(())
    }

    #[test]
    fn fail_panic() {
        Blocks::from_file("BadBlocks.txt").unwrap();
    }

    #[test]
    fn fail_result() -> Result<(), FromFileError> {
        Blocks::from_file("BadBlocks.txt")?;
        unreachable!()
    }

    #[test]
    fn fail_test_result() -> TestResult {
        Blocks::from_file("BadBlocks.txt")?;
        unreachable!()
    }

    #[test]
    fn fail_anyhow() -> anyhow::Result<()> {
        Blocks::from_file("BadBlocks.txt")?;
        unreachable!()
    }

    use crate::*;
    use testresult::TestResult;
 }

 pub const LATEST_URL: &str = "https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt";

 use std::cmp;
 use std::fs;
 use std::io;
 use std::num::ParseIntError;
 use std::ops::RangeInclusive;
 use std::path::Path;
 use std::str::FromStr;
	# test comment
	0080..00FF; Latin-1 Supplement
	00j0..007F; Basic Latin
	0000..007F; Basic Latin
	0080..00FF; Latin-1 Supplement
	0100..017F; Latin Extended-A
	0180..024F; Latin Extended-B
	0250..02AF; IPA Extensions
	[package]
	name = "rust-error-styles"
	version = "0.1.0"
	edition = "2021"

	[dependencies]
	anyhow = "1.0.70"
	testresult = "0.3.0"
	thiserror = "1.0.40"
	ureq = "2.6.2"
	//! This crate provides types for UCD’s `Blocks.txt`.

	#[derive(Debug)]
	pub struct Blocks {
	ranges: Vec<(RangeInclusive<u32>, String)>,
	}

	impl Blocks {
	pub fn block_of(&self, c: char) -> &str {
	self.ranges
	.binary_search_by(\|(range, _)\| {
	if *range.end() < u32::from(c) {
	cmp::Ordering::Less
	} else if u32::from(c) < *range.start() {
	cmp::Ordering::Greater
	} else {
	cmp::Ordering::Equal
	}
	})
	.map(\|i\| &*self.ranges[i].1)
	.unwrap_or("No_Block")
	}
	pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, FromFileError> {
	let path = path.as_ref();
	let from_file = \|\| Ok(Self::from_str(&fs::read_to_string(path)?)?);
	from_file().map_err(\|source\| FromFileError {
	path: path.into(),
	source,
	})
	}
	pub fn download(agent: &ureq::Agent) -> Result<Self, DownloadError> {
	let download = \|\| -> Result<_, DownloadErrorKind> {
	let response = agent.get(LATEST_URL).call().map_err(Box::new)?;
	Ok(Self::from_str(&response.into_string()?)?)
	};
	Ok(download()?)
	}
	}

	impl FromStr for Blocks {
	type Err = ParseError;
	fn from_str(s: &str) -> Result<Self, Self::Err> {
	let ranges = s
	.lines()
	// I couldn't help myself
	.map(\|line\| line.split_once('#').map(\|(line, _)\| line).unwrap_or(line))
	.enumerate()
	.filter(\|(_, line)\| !line.is_empty())
	.map(\|(i, line)\| {
	(\|\| {
	let (range, name) = line.split_once(';').ok_or(ParseErrorKind::NoSemicolon)?;
	let (range, name) = (range.trim(), name.trim());
	let (start, end) = range.split_once("..").ok_or(ParseErrorKind::NoDotDot)?;
	let start = u32::from_str_radix(start, 16)?;
	let end = u32::from_str_radix(end, 16)?;
	Ok((start..=end, name.to_owned()))
	})()
	.map_err(\|source\| ParseError { line: i, source })
	})
	.collect::<Result<Vec<_>, ParseError>>()?;
	Ok(Self { ranges })
	}
	}

	#[derive(Debug, thiserror::Error)]
	#[error("failed to download Blocks.txt from the Unicode website")]
	#[non_exhaustive]
	pub struct DownloadError(#[from] pub DownloadErrorKind);

	#[derive(Debug, thiserror::Error)]
	#[error(transparent)]
	pub enum DownloadErrorKind {
	Request(#[from] Box<ureq::Error>),
	ReadBody(#[from] io::Error),
	Parse(#[from] ParseError),
	}

	#[derive(Debug, thiserror::Error)]
	#[error("error reading `{path}`")]
	#[non_exhaustive]
	pub struct FromFileError {
	pub path: Box<Path>,
	pub source: FromFileErrorKind,
	}

	#[derive(Debug, thiserror::Error)]
	#[error(transparent)]
	pub enum FromFileErrorKind {
	ReadFile(#[from] io::Error),
	Parse(#[from] ParseError),
	}

	#[derive(Debug, thiserror::Error)]
	#[error("invalid Blocks.txt data on line {}", self.line + 1)]
	#[non_exhaustive]
	pub struct ParseError {
	pub line: usize,
	pub source: ParseErrorKind,
	}

	#[derive(Debug, thiserror::Error)]
	pub enum ParseErrorKind {
	#[error("no semicolon")]
	#[non_exhaustive]
	NoSemicolon,
	#[error("no `..` in range")]
	#[non_exhaustive]
	NoDotDot,
	#[error("one end of range is not a valid hexadecimal integer")]
	#[non_exhaustive]
	ParseInt(#[from] ParseIntError),
	}

	#[cfg(test)]
	mod tests {
	#[test]
	fn real_unicode() -> TestResult {
	let data = Blocks::download(&ureq::agent())?;
	assert_eq!(data.block_of('\u{0080}'), "Latin-1 Supplement");
	assert_eq!(data.block_of('½'), "Latin-1 Supplement");
	assert_eq!(data.block_of('\u{00FF}'), "Latin-1 Supplement");
	assert_eq!(data.block_of('\u{EFFFF}'), "No_Block");
	Ok(())
	}

	#[test]
	fn test_unicode() -> TestResult {
	let data = include_str!("../Blocks.txt").parse::<Blocks>()?;
	assert_eq!(data.block_of('\u{0080}'), "Latin-1 Supplement");
	assert_eq!(data.block_of('½'), "Latin-1 Supplement");
	assert_eq!(data.block_of('\u{00FF}'), "Latin-1 Supplement");
	assert_eq!(data.block_of('\u{EFFFF}'), "No_Block");
	Ok(())
	}

	#[test]
	fn fail_panic() {
	Blocks::from_file("BadBlocks.txt").unwrap();
	}

	#[test]
	fn fail_result() -> Result<(), FromFileError> {
	Blocks::from_file("BadBlocks.txt")?;
	unreachable!()
	}

	#[test]
	fn fail_test_result() -> TestResult {
	Blocks::from_file("BadBlocks.txt")?;
	unreachable!()
	}

	#[test]
	fn fail_anyhow() -> anyhow::Result<()> {
	Blocks::from_file("BadBlocks.txt")?;
	unreachable!()
	}

	use crate::*;
	use testresult::TestResult;
	}

	pub const LATEST_URL: &str = "https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt";

	use std::cmp;
	use std::fs;
	use std::io;
	use std::num::ParseIntError;
	use std::ops::RangeInclusive;
	use std::path::Path;
	use std::str::FromStr;