|
use crate::columns::ColumnMap; |
|
use crate::row::Row; |
|
use serde::de::{self, Deserialize, Deserializer, MapAccess, Unexpected, Visitor}; |
|
use std::borrow::Cow; |
|
use std::fmt; |
|
use std::marker::PhantomData; |
|
use std::str; |
|
|
|
struct ColumnVisitor; |
|
|
|
/// Implements a visitor that converts the 'columns' JSON object into a ColumnMap |
|
impl<'de> Visitor<'de> for ColumnVisitor { |
|
type Value = ColumnMap; |
|
|
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { |
|
formatter.write_str("Columns") |
|
} |
|
|
|
fn visit_map<M>(self, mut access: M) -> Result<Self::Value, M::Error> |
|
where |
|
M: MapAccess<'de>, |
|
{ |
|
let mut columns = match access.size_hint() { |
|
Some(capacity) => ColumnMap::with_capacity(capacity), |
|
None => ColumnMap::default(), |
|
}; |
|
|
|
while let Some((key, value)) = access.next_entry::<String, String>()? { |
|
match value.parse::<u8>() { |
|
Ok(index) => columns.insert(key, index), |
|
_ => return Err(de::Error::invalid_type(Unexpected::Str(&value), &self)), |
|
}; |
|
} |
|
|
|
Ok(columns) |
|
} |
|
} |
|
|
|
impl<'de> Deserialize<'de> for ColumnMap { |
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> |
|
where |
|
D: Deserializer<'de>, |
|
{ |
|
deserializer.deserialize_map(ColumnVisitor) |
|
} |
|
} |
|
|
|
enum Field { |
|
Id, |
|
Metadata, |
|
Columns, |
|
Rows, |
|
} |
|
|
|
const FIELDS: &'static [&'static str] = &["id", "metadata", "columns", "rows"]; |
|
|
|
struct FieldVisitor; |
|
|
|
/// Implements a visitor to track well-known fields without allocating |
|
impl<'de> Visitor<'de> for FieldVisitor { |
|
type Value = Field; |
|
|
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { |
|
formatter.write_fmt(format_args!( |
|
"Expected one of the following: {}", |
|
FIELDS.join(", ") |
|
)) |
|
} |
|
|
|
fn visit_str<E>(self, value: &str) -> Result<Field, E> |
|
where |
|
E: de::Error, |
|
{ |
|
match value { |
|
"id" => Ok(Field::Id), |
|
"metadata" => Ok(Field::Metadata), |
|
"columns" => Ok(Field::Columns), |
|
"rows" => Ok(Field::Rows), |
|
_ => Err(de::Error::unknown_field(value, FIELDS)), |
|
} |
|
} |
|
} |
|
|
|
impl<'de> Deserialize<'de> for Field { |
|
fn deserialize<D>(deserializer: D) -> Result<Field, D::Error> |
|
where |
|
D: Deserializer<'de>, |
|
{ |
|
deserializer.deserialize_identifier(FieldVisitor) |
|
} |
|
} |
|
|
|
pub(crate) struct PartialJson { |
|
pub id: String, |
|
pub metadata: Vec<String>, |
|
pub columns: ColumnMap, |
|
} |
|
|
|
impl Default for PartialJson { |
|
fn default() -> Self { |
|
Self { |
|
id: String::with_capacity(0), |
|
metadata: Vec::with_capacity(0), |
|
columns: ColumnMap::with_capacity(0), |
|
} |
|
} |
|
} |
|
|
|
struct PartialJsonVisitor<'de, 'a>(&'a mut PartialJson, PhantomData<&'de ()>); |
|
|
|
/// Implements a visitor that populates everything to, but not including, rows |
|
impl<'de, 'a> Visitor<'de> for PartialJsonVisitor<'de, 'a> { |
|
type Value = (); |
|
|
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { |
|
formatter.write_str("JSON document") |
|
} |
|
|
|
fn visit_map<V>(self, mut map: V) -> Result<Self::Value, V::Error> |
|
where |
|
V: MapAccess<'de>, |
|
{ |
|
let doc = self.0; |
|
|
|
while let Some(key) = map.next_key()? { |
|
match key { |
|
Field::Id => doc.id = map.next_value()?, |
|
Field::Metadata => doc.metadata = map.next_value()?, |
|
Field::Columns => doc.columns = map.next_value()?, |
|
Field::Rows => { |
|
// TODO: before moving on, validation can be done here |
|
// |
|
// if doc.id.is_empty() { |
|
// return Err(de::Error::missing_field("Missing 'id'")); |
|
// } |
|
|
|
// this is where 'magic' happens with the visitor. we perform validation |
|
// on the previously expected data members before we exit deserialization. |
|
// at this point the underlying stream has read up to: |
|
// |
|
// { |
|
// ..., |
|
// "rows": [ |
|
// ↑ cursor is right before the array value |
|
// ] |
|
// } |
|
// |
|
break; |
|
} |
|
} |
|
} |
|
|
|
Ok(()) |
|
} |
|
} |
|
|
|
impl<'de> Deserialize<'de> for PartialJson { |
|
fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error> |
|
where |
|
D: Deserializer<'de>, |
|
{ |
|
Err(de::Error::custom("Use deserialize_in_place instead")) |
|
} |
|
|
|
fn deserialize_in_place<D>(deserializer: D, place: &mut Self) -> Result<(), D::Error> |
|
where |
|
D: Deserializer<'de>, |
|
{ |
|
// normally, 'deserialize' would be used to reify a struct. we can reify a partial |
|
// struct, but the default behavior of the JSON deserialize will read to the end |
|
// to verify format correctness. we side step that process by deserializing in-place |
|
// and they bail out of the deserialization process early. |
|
deserializer.deserialize_struct( |
|
"PartialJson", |
|
FIELDS, |
|
PartialJsonVisitor(place, PhantomData), |
|
) |
|
} |
|
} |
|
|
|
struct ValuesVisitor<'a> { |
|
row: &'a mut Row |
|
} |
|
|
|
impl<'a> ValuesVisitor<'a> { |
|
fn new(row: &'a mut Row) -> Self { |
|
Self { row } |
|
} |
|
} |
|
|
|
/// Implements a visitor that process the values of row as a singleton array of values |
|
impl<'de, 'a> Visitor<'de> for ValuesVisitor<'a> { |
|
type Value = (); |
|
|
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { |
|
formatter.write_str("Row") |
|
} |
|
|
|
fn visit_map<V>(self, mut map: V) -> Result<Self::Value, V::Error> |
|
where |
|
V: MapAccess<'de>, |
|
{ |
|
let mut count = 0; |
|
let expected = self.row.values.len(); |
|
|
|
// the JSON is an object, but the key stringified integer, so we |
|
// can map it to a simple array instead |
|
while let Some(key) = map.next_key::<usize>()? { |
|
if key < expected { |
|
let buffer = &mut self.row.values[key]; |
|
|
|
buffer.clear(); |
|
|
|
// all values should be JSON strings; however, we don't know |
|
// if the text contains escape sequences. if the value is just |
|
// normal string, then we'll get a borrowed string slice without |
|
// allocating. if the string has an escape sequence, it has to |
|
// be unescaped, which will allocate a new string to hold the |
|
// unescaped value. |
|
// |
|
// { |
|
// "normal": "normal", |
|
// "escaped": "line 1\\nline 2", |
|
// } |
|
// |
|
// we check the value length as well. a zero-length string |
|
// (e.g. "") will never allocate. |
|
// |
|
// there isn't an immediately obvious way to capture the |
|
// Cow values directly. if that is possible, there is an |
|
// opportunity to improve things further, by skipping |
|
// the copy operation |
|
let value = map.next_value::<Cow<'a, str>>()?; |
|
|
|
if !value.is_empty() { |
|
match value { |
|
Cow::Borrowed(normal_string) => buffer.push_str(normal_string), |
|
Cow::Owned(unescaped_string) => buffer.push_str(unescaped_string.as_str()), |
|
}; |
|
} |
|
} |
|
|
|
count += 1; |
|
} |
|
|
|
if count != expected { |
|
return Err(de::Error::custom(format!( |
|
"Row {} has a column count of {}, but there are {} column defined.", |
|
self.row.index, count, expected, |
|
))); |
|
} |
|
|
|
Ok(()) |
|
} |
|
} |
|
|
|
impl<'de> Deserialize<'de> for Row { |
|
fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error> |
|
where |
|
D: Deserializer<'de>, |
|
{ |
|
// if we really wanted to support it, creating a new Row |
|
// per item would work |
|
Err(de::Error::custom("Use deserialize_in_place instead")) |
|
} |
|
|
|
fn deserialize_in_place<D>(deserializer: D, place: &mut Self) -> Result<(), D::Error> |
|
where |
|
D: Deserializer<'de>, |
|
{ |
|
// big gains come from passing in a single Row struct as a monad and updating just its |
|
// values. the first pass is expected to allocate a String buffer for each value. |
|
// while we can't be sure, most columns use similar values so subsequent rows |
|
// likely just copy bytes from the input stream to the existing string buffers. |
|
// this should be a very small amount of memory (ex: < 1K) |
|
deserializer.deserialize_map(ValuesVisitor::new(place)) |
|
} |
|
} |