Last active
August 26, 2018 17:30
-
-
Save brunoczim/d598606ba107e188a0b2c8b6be459c25 to your computer and use it in GitHub Desktop.
Source file mapping for parsers. interpreters or compilers.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::{ | |
cmp::Ordering, | |
fmt, | |
hash::{Hash, Hasher}, | |
ops::{ | |
Index, | |
Range, | |
RangeFrom, | |
RangeFull, | |
RangeInclusive, | |
RangeTo, | |
RangeToInclusive, | |
}, | |
sync::Arc, | |
}; | |
use unicode_segmentation::UnicodeSegmentation; | |
#[derive(Debug)] | |
pub struct SrcInner { | |
name: Box<str>, | |
string: Box<str>, | |
pos: Box<[usize]>, | |
lines: Box<[usize]>, | |
} | |
#[derive(Debug, Clone)] | |
pub struct Src { | |
inner: Arc<SrcInner>, | |
} | |
#[derive(Debug, Clone)] | |
pub struct Stream { | |
src: Src, | |
pos: usize, | |
mark: usize, | |
} | |
#[derive(Debug, Clone)] | |
pub struct Loc { | |
src: Src, | |
pos: usize, | |
} | |
#[derive(Debug, Clone)] | |
pub struct Span { | |
loc: Loc, | |
len: usize, | |
} | |
pub trait SrcIndex { | |
type Out: ?Sized; | |
fn get(self, src: &Src) -> Option<&Self::Out>; | |
} | |
impl Src { | |
pub fn new<S, T>(name: S, string: T) -> Self | |
where | |
S: Into<Box<str>>, | |
T: Into<Box<str>>, | |
{ | |
let boxed = string.into(); | |
let mut pos = Vec::new(); | |
let mut lines = Vec::new(); | |
for (i, graph) in boxed.grapheme_indices(true) { | |
if graph == "\n" { | |
lines.push(pos.len()); | |
} | |
pos.push(i); | |
} | |
pos.push(boxed.len()); | |
Self { | |
inner: Arc::new(SrcInner { | |
name: name.into(), | |
string: boxed, | |
pos: pos.into(), | |
lines: lines.into(), | |
}), | |
} | |
} | |
pub fn name(&self) -> &str { | |
&self.inner.name | |
} | |
pub fn contents(&self) -> &str { | |
&self.inner.string | |
} | |
pub fn segments(&self) -> &[usize] { | |
&self.inner.pos | |
} | |
pub fn len(&self) -> usize { | |
self.inner.pos.len() - 1 | |
} | |
pub fn stream(&self) -> Stream { | |
Stream { src: self.clone(), pos: 0, mark: 0 } | |
} | |
pub fn get<I>(&self, idx: I) -> Option<&I::Out> | |
where | |
I: SrcIndex, | |
{ | |
idx.get(self) | |
} | |
} | |
impl<I> Index<I> for Src | |
where | |
I: SrcIndex, | |
{ | |
type Output = I::Out; | |
fn index(&self, idx: I) -> &I::Out { | |
self.get(idx).expect("Bad source indexing") | |
} | |
} | |
impl Stream { | |
pub fn src(&self) -> &Src { | |
&self.src | |
} | |
pub fn curr(&self) -> Option<&str> { | |
self.src.get(self.pos) | |
} | |
pub fn pos(&self) -> usize { | |
self.pos | |
} | |
pub fn marked(&self) -> usize { | |
self.mark | |
} | |
pub fn loc(&self) -> Loc { | |
Loc { src: self.src.clone(), pos: self.pos } | |
} | |
pub fn span(&self) -> Span { | |
let (pos, len) = if self.pos > self.mark { | |
(self.mark, self.pos - self.mark) | |
} else { | |
(self.pos, self.mark - self.pos) | |
}; | |
Span { loc: Loc { src: self.src.clone(), pos }, len } | |
} | |
pub fn mark(&mut self) { | |
self.mark = self.pos; | |
} | |
pub fn next(&mut self) { | |
self.advance(1); | |
} | |
pub fn prev(&mut self) { | |
self.rollback(1); | |
} | |
pub fn advance(&mut self, count: usize) { | |
self.pos = self.src.len().min(self.pos.saturating_add(count)); | |
} | |
pub fn rollback(&mut self, count: usize) { | |
self.pos = self.pos.checked_sub(count).unwrap_or(0); | |
} | |
} | |
impl Loc { | |
pub fn pos(&self) -> usize { | |
self.pos | |
} | |
pub fn src(&self) -> &Src { | |
&self.src | |
} | |
pub fn line_column(&self) -> (usize, usize) { | |
let idx = match self.src.inner.lines.binary_search(&self.pos) { | |
Ok(i) => i, | |
Err(i) => i, | |
}; | |
( | |
idx + 1, | |
self.pos + if idx == 0 { 1 } else { self.src.inner.lines[idx - 1] }, | |
) | |
} | |
} | |
impl fmt::Display for Loc { | |
fn fmt(&self, fmtr: &mut fmt::Formatter<'_>) -> fmt::Result { | |
let (line, col) = self.line_column(); | |
write!(fmtr, "in {}, line {}, column {}", self.src.name(), line, col) | |
} | |
} | |
impl Span { | |
pub fn loc(&self) -> &Loc { | |
&self.loc | |
} | |
pub fn len(&self) -> usize { | |
self.len | |
} | |
pub fn inner_str(&self) -> &str { | |
&self.loc.src[self.loc.pos .. self.loc.pos + self.len] | |
} | |
} | |
impl fmt::Display for Span { | |
fn fmt(&self, fmtr: &mut fmt::Formatter<'_>) -> fmt::Result { | |
write!(fmtr, "{}", self.inner_str()) | |
} | |
} | |
impl PartialEq for Span { | |
fn eq(&self, other: &Self) -> bool { | |
self.inner_str() == other.inner_str() | |
} | |
} | |
impl Eq for Span {} | |
impl PartialOrd for Span { | |
fn partial_cmp(&self, other: &Self) -> Option<Ordering> { | |
self.inner_str().partial_cmp(other.inner_str()) | |
} | |
} | |
impl Ord for Span { | |
fn cmp(&self, other: &Self) -> Ordering { | |
self.inner_str().cmp(other.inner_str()) | |
} | |
} | |
impl Hash for Span { | |
fn hash<H>(&self, state: &mut H) | |
where | |
H: Hasher, | |
{ | |
self.inner_str().hash(state) | |
} | |
} | |
impl SrcIndex for usize { | |
type Out = str; | |
fn get(self, src: &Src) -> Option<&Self::Out> { | |
(self .. self + 1).get(src) | |
} | |
} | |
impl SrcIndex for Range<usize> { | |
type Out = str; | |
fn get(self, src: &Src) -> Option<&Self::Out> { | |
let start = src.inner.pos.get(self.start); | |
let end = src.inner.pos.get(self.end); | |
if let (Some(&start), Some(&end)) = (start, end) { | |
Some(unsafe { src.inner.string.get_unchecked(start .. end) }) | |
} else { | |
None | |
} | |
} | |
} | |
impl SrcIndex for RangeTo<usize> { | |
type Out = str; | |
fn get(self, src: &Src) -> Option<&Self::Out> { | |
(0 .. self.end).get(src) | |
} | |
} | |
impl SrcIndex for RangeFrom<usize> { | |
type Out = str; | |
fn get(self, src: &Src) -> Option<&Self::Out> { | |
(self.start .. src.len()).get(src) | |
} | |
} | |
impl SrcIndex for RangeFull { | |
type Out = str; | |
fn get(self, src: &Src) -> Option<&Self::Out> { | |
Some(src.contents()) | |
} | |
} | |
impl SrcIndex for RangeInclusive<usize> { | |
type Out = str; | |
fn get(self, src: &Src) -> Option<&Self::Out> { | |
(*self.start() .. *self.end() + 1).get(src) | |
} | |
} | |
impl SrcIndex for RangeToInclusive<usize> { | |
type Out = str; | |
fn get(self, src: &Src) -> Option<&Self::Out> { | |
(0 .. self.end + 1).get(src) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment