Skip to content

Instantly share code, notes, and snippets.

@brunoczim
Last active August 26, 2018 17:30
Show Gist options
  • Save brunoczim/d598606ba107e188a0b2c8b6be459c25 to your computer and use it in GitHub Desktop.
Save brunoczim/d598606ba107e188a0b2c8b6be459c25 to your computer and use it in GitHub Desktop.
Source file mapping for parsers. interpreters or compilers.
use std::{
cmp::Ordering,
fmt,
hash::{Hash, Hasher},
ops::{
Index,
Range,
RangeFrom,
RangeFull,
RangeInclusive,
RangeTo,
RangeToInclusive,
},
sync::Arc,
};
use unicode_segmentation::UnicodeSegmentation;
#[derive(Debug)]
pub struct SrcInner {
name: Box<str>,
string: Box<str>,
pos: Box<[usize]>,
lines: Box<[usize]>,
}
#[derive(Debug, Clone)]
pub struct Src {
inner: Arc<SrcInner>,
}
#[derive(Debug, Clone)]
pub struct Stream {
src: Src,
pos: usize,
mark: usize,
}
#[derive(Debug, Clone)]
pub struct Loc {
src: Src,
pos: usize,
}
#[derive(Debug, Clone)]
pub struct Span {
loc: Loc,
len: usize,
}
pub trait SrcIndex {
type Out: ?Sized;
fn get(self, src: &Src) -> Option<&Self::Out>;
}
impl Src {
pub fn new<S, T>(name: S, string: T) -> Self
where
S: Into<Box<str>>,
T: Into<Box<str>>,
{
let boxed = string.into();
let mut pos = Vec::new();
let mut lines = Vec::new();
for (i, graph) in boxed.grapheme_indices(true) {
if graph == "\n" {
lines.push(pos.len());
}
pos.push(i);
}
pos.push(boxed.len());
Self {
inner: Arc::new(SrcInner {
name: name.into(),
string: boxed,
pos: pos.into(),
lines: lines.into(),
}),
}
}
pub fn name(&self) -> &str {
&self.inner.name
}
pub fn contents(&self) -> &str {
&self.inner.string
}
pub fn segments(&self) -> &[usize] {
&self.inner.pos
}
pub fn len(&self) -> usize {
self.inner.pos.len() - 1
}
pub fn stream(&self) -> Stream {
Stream { src: self.clone(), pos: 0, mark: 0 }
}
pub fn get<I>(&self, idx: I) -> Option<&I::Out>
where
I: SrcIndex,
{
idx.get(self)
}
}
impl<I> Index<I> for Src
where
I: SrcIndex,
{
type Output = I::Out;
fn index(&self, idx: I) -> &I::Out {
self.get(idx).expect("Bad source indexing")
}
}
impl Stream {
pub fn src(&self) -> &Src {
&self.src
}
pub fn curr(&self) -> Option<&str> {
self.src.get(self.pos)
}
pub fn pos(&self) -> usize {
self.pos
}
pub fn marked(&self) -> usize {
self.mark
}
pub fn loc(&self) -> Loc {
Loc { src: self.src.clone(), pos: self.pos }
}
pub fn span(&self) -> Span {
let (pos, len) = if self.pos > self.mark {
(self.mark, self.pos - self.mark)
} else {
(self.pos, self.mark - self.pos)
};
Span { loc: Loc { src: self.src.clone(), pos }, len }
}
pub fn mark(&mut self) {
self.mark = self.pos;
}
pub fn next(&mut self) {
self.advance(1);
}
pub fn prev(&mut self) {
self.rollback(1);
}
pub fn advance(&mut self, count: usize) {
self.pos = self.src.len().min(self.pos.saturating_add(count));
}
pub fn rollback(&mut self, count: usize) {
self.pos = self.pos.checked_sub(count).unwrap_or(0);
}
}
impl Loc {
pub fn pos(&self) -> usize {
self.pos
}
pub fn src(&self) -> &Src {
&self.src
}
pub fn line_column(&self) -> (usize, usize) {
let idx = match self.src.inner.lines.binary_search(&self.pos) {
Ok(i) => i,
Err(i) => i,
};
(
idx + 1,
self.pos + if idx == 0 { 1 } else { self.src.inner.lines[idx - 1] },
)
}
}
impl fmt::Display for Loc {
fn fmt(&self, fmtr: &mut fmt::Formatter<'_>) -> fmt::Result {
let (line, col) = self.line_column();
write!(fmtr, "in {}, line {}, column {}", self.src.name(), line, col)
}
}
impl Span {
pub fn loc(&self) -> &Loc {
&self.loc
}
pub fn len(&self) -> usize {
self.len
}
pub fn inner_str(&self) -> &str {
&self.loc.src[self.loc.pos .. self.loc.pos + self.len]
}
}
impl fmt::Display for Span {
fn fmt(&self, fmtr: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(fmtr, "{}", self.inner_str())
}
}
impl PartialEq for Span {
fn eq(&self, other: &Self) -> bool {
self.inner_str() == other.inner_str()
}
}
impl Eq for Span {}
impl PartialOrd for Span {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.inner_str().partial_cmp(other.inner_str())
}
}
impl Ord for Span {
fn cmp(&self, other: &Self) -> Ordering {
self.inner_str().cmp(other.inner_str())
}
}
impl Hash for Span {
fn hash<H>(&self, state: &mut H)
where
H: Hasher,
{
self.inner_str().hash(state)
}
}
impl SrcIndex for usize {
type Out = str;
fn get(self, src: &Src) -> Option<&Self::Out> {
(self .. self + 1).get(src)
}
}
impl SrcIndex for Range<usize> {
type Out = str;
fn get(self, src: &Src) -> Option<&Self::Out> {
let start = src.inner.pos.get(self.start);
let end = src.inner.pos.get(self.end);
if let (Some(&start), Some(&end)) = (start, end) {
Some(unsafe { src.inner.string.get_unchecked(start .. end) })
} else {
None
}
}
}
impl SrcIndex for RangeTo<usize> {
type Out = str;
fn get(self, src: &Src) -> Option<&Self::Out> {
(0 .. self.end).get(src)
}
}
impl SrcIndex for RangeFrom<usize> {
type Out = str;
fn get(self, src: &Src) -> Option<&Self::Out> {
(self.start .. src.len()).get(src)
}
}
impl SrcIndex for RangeFull {
type Out = str;
fn get(self, src: &Src) -> Option<&Self::Out> {
Some(src.contents())
}
}
impl SrcIndex for RangeInclusive<usize> {
type Out = str;
fn get(self, src: &Src) -> Option<&Self::Out> {
(*self.start() .. *self.end() + 1).get(src)
}
}
impl SrcIndex for RangeToInclusive<usize> {
type Out = str;
fn get(self, src: &Src) -> Option<&Self::Out> {
(0 .. self.end + 1).get(src)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment