Created
November 1, 2021 17:19
-
-
Save tobz/fec84b1ac850aeaef216608a8585c982 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/metrics/src/cow.rs b/metrics/src/cow.rs | |
index 968b5ef..f1627af 100644 | |
--- a/metrics/src/cow.rs | |
+++ b/metrics/src/cow.rs | |
@@ -91,6 +91,10 @@ impl<'a> Cow<'a, str> { | |
marker: PhantomData, | |
} | |
} | |
+ | |
+ pub const fn const_as_ref(&self) -> &str { | |
+ unsafe { const_str_ref_from_parts(self.ptr, self.meta) } | |
+ } | |
} | |
impl<'a> Cow<'a, [Cow<'static, str>]> { | |
@@ -449,12 +453,12 @@ pub struct Metadata(usize, usize); | |
impl Metadata { | |
#[inline] | |
- fn length(&self) -> usize { | |
+ const fn length(&self) -> usize { | |
self.0 | |
} | |
#[inline] | |
- fn capacity(&self) -> usize { | |
+ const fn capacity(&self) -> usize { | |
self.1 | |
} | |
@@ -523,3 +527,58 @@ impl Metadata { | |
Metadata(1 << 32) | |
} | |
}*/ | |
+ | |
+const unsafe fn const_str_ref_from_parts<'a>(ptr: NonNull<u8>, metadata: Metadata) -> &'a str { | |
+ // SAFETY: | |
+ // | |
+ // This function is just about the most cursed code I've ever written, that said... | |
+ // | |
+ // - We only call this method via `Cow<'a, str>`, so we know we're dealing with a string. | |
+ // - If we're in a const context, we can't possibly be dealing with a `String`, and even if | |
+ // support for that gets added to const, our metadata tracks the length of the string anyways. | |
+ // - On top of that, `String`'s own method for getting a pointer looks like this: | |
+ // `self as *const str as *const u8` so we should be safe, layout-wise. | |
+ // - Our string pointer is already aligned, as we didn't manually construct it nor do we modify | |
+ // it after storing it. | |
+ // - We're handing out an immutable reference bound to the lifetime of the CoW structure itself. | |
+ // - The caller ensures the lifetime is tied to the CoW structure itself. | |
+ // | |
+ // We copy the machinery used by `str::ptr` to build our fat pointer to the underlying string, | |
+ // where we have the pointer, and the "metadata", which for a string is simply the pointer to | |
+ // the bytes and then the number of bytes in the string, as a usize. After that, it's just | |
+ // normal casting to build our immutable reference. | |
+ // | |
+ // Final warning, from the stdlib itself, that I decided not to heed (but this code comment | |
+ // should only exists if we made it work): | |
+ // | |
+ // > Accessing the value from the `PtrRepr` union is safe since *const T and PtrComponents<T> | |
+ // > have the same memory layouts. Only std can make this guarantee. | |
+ | |
+ #[repr(C)] | |
+ union StrPtrRepr<'b> { | |
+ shared_ref: &'b str, | |
+ components: StrPtrComponents, | |
+ } | |
+ | |
+ #[repr(C)] | |
+ struct StrPtrComponents { | |
+ data_address: *const (), | |
+ metadata: usize, | |
+ } | |
+ | |
+ impl Copy for StrPtrComponents {} | |
+ impl Clone for StrPtrComponents { | |
+ fn clone(&self) -> Self { | |
+ *self | |
+ } | |
+ } | |
+ | |
+ let repr = StrPtrRepr { | |
+ components: StrPtrComponents { | |
+ data_address: ptr.as_ptr() as *const (), | |
+ metadata: metadata.length(), | |
+ }, | |
+ }; | |
+ | |
+ repr.shared_ref | |
+} | |
diff --git a/metrics/src/handles.rs b/metrics/src/handles.rs | |
index 5d94086..0aa102a 100644 | |
--- a/metrics/src/handles.rs | |
+++ b/metrics/src/handles.rs | |
@@ -158,11 +158,11 @@ impl Histogram { | |
impl CounterFn for AtomicU64 { | |
fn increment(&self, value: u64) { | |
- let _ = self.fetch_add(value, Ordering::Release); | |
+ let _ = self.fetch_add(value, Ordering::Relaxed); | |
} | |
fn absolute(&self, value: u64) { | |
- let _ = self.fetch_max(value, Ordering::AcqRel); | |
+ let _ = self.fetch_max(value, Ordering::Relaxed); | |
} | |
} | |
@@ -196,7 +196,7 @@ impl GaugeFn for AtomicU64 { | |
} | |
fn set(&self, value: f64) { | |
- let _ = self.swap(value.to_bits(), Ordering::AcqRel); | |
+ let _ = self.swap(value.to_bits(), Ordering::Relaxed); | |
} | |
} | |
diff --git a/metrics/src/hashing.rs b/metrics/src/hashing.rs | |
new file mode 100644 | |
index 0000000..1f52c96 | |
--- /dev/null | |
+++ b/metrics/src/hashing.rs | |
@@ -0,0 +1,87 @@ | |
+use std::hash::Hasher; | |
+ | |
+const FNV_OFFSET_BASIS_64: u64 = 0xcbf29ce484222325; | |
+const FNV_PRIME_64: u64 = 0x00000100000001B3; | |
+ | |
+/// Key-specific hashing algorithm. | |
+/// | |
+/// Currently uses FNV1a - <https://datatracker.ietf.org/doc/html/draft-eastlake-fnv-17.html> | |
+/// | |
+/// For any use-case within a `metrics`-owned or adjacent crate, where hashing of a key is required, | |
+/// this is the hasher that will be used. | |
+pub type KeyHasher = Fnv1aConstHasher; | |
+ | |
+pub struct Fnv1aConstHasher(u64); | |
+ | |
+impl Fnv1aConstHasher { | |
+ pub const fn new() -> Self { | |
+ Self(FNV_OFFSET_BASIS_64) | |
+ } | |
+ | |
+ pub const fn const_write(self, bytes: &[u8]) -> Fnv1aConstHasher { | |
+ let mut hash = self.0; | |
+ | |
+ let mut i = 0; | |
+ let len = bytes.len(); | |
+ while i < len { | |
+ hash ^= bytes[i] as u64; | |
+ hash = hash.wrapping_mul(FNV_PRIME_64); | |
+ i += 1; | |
+ } | |
+ | |
+ Fnv1aConstHasher(hash) | |
+ } | |
+ | |
+ pub const fn const_finish(&self) -> u64 { | |
+ self.0 | |
+ } | |
+} | |
+ | |
+impl Default for Fnv1aConstHasher { | |
+ fn default() -> Self { | |
+ Self::new() | |
+ } | |
+} | |
+ | |
+impl Hasher for Fnv1aConstHasher { | |
+ fn finish(&self) -> u64 { | |
+ self.const_finish() | |
+ } | |
+ | |
+ fn write(&mut self, bytes: &[u8]) { | |
+ let mut hash = Fnv1aConstHasher(self.0); | |
+ hash = hash.const_write(bytes); | |
+ self.0 = hash.0; | |
+ } | |
+} | |
+ | |
+#[derive(Copy, Clone)] | |
+pub struct Fnv1aHasher(u64); | |
+ | |
+impl Fnv1aHasher { | |
+ pub const fn new() -> Self { | |
+ Self(FNV_OFFSET_BASIS_64) | |
+ } | |
+} | |
+ | |
+impl Default for Fnv1aHasher { | |
+ fn default() -> Self { | |
+ Self::new() | |
+ } | |
+} | |
+ | |
+impl Hasher for Fnv1aHasher { | |
+ fn finish(&self) -> u64 { | |
+ self.0 | |
+ } | |
+ | |
+ fn write(&mut self, bytes: &[u8]) { | |
+ let mut i = 0; | |
+ let len = bytes.len(); | |
+ while i < len { | |
+ self.0 ^= bytes[i] as u64; | |
+ self.0 = self.0.wrapping_mul(FNV_PRIME_64); | |
+ i += 1; | |
+ } | |
+ } | |
+} | |
diff --git a/metrics/src/key.rs b/metrics/src/key.rs | |
index f040df0..044cb86 100644 | |
--- a/metrics/src/key.rs | |
+++ b/metrics/src/key.rs | |
@@ -1,34 +1,29 @@ | |
-use crate::{cow::Cow, IntoLabels, KeyHasher, Label, SharedString}; | |
+use crate::{cow::Cow, hashing::Fnv1aHasher, IntoLabels, KeyHasher, Label, SharedString}; | |
use alloc::{string::String, vec::Vec}; | |
use core::{fmt, hash::Hash, slice::Iter}; | |
-use std::{ | |
- cmp, | |
- hash::Hasher, | |
- sync::atomic::{AtomicBool, AtomicU64, Ordering}, | |
-}; | |
+use std::{cmp, hash::Hasher}; | |
const NO_LABELS: [Label; 0] = []; | |
/// A metric identifier. | |
/// | |
-/// # Safety | |
-/// Clippy will report any usage of `Key` as the key of a map/set as "mutable key type", meaning | |
-/// that it believes that there is interior mutability present which could lead to a key being | |
-/// hashed different over time. That behavior could lead to unexpected behavior, as standard | |
-/// maps/sets depend on keys having stable hashes over time, related to times when they must be | |
-/// recomputed as the internal storage is resized and items are moved around. | |
+/// # Hashing behavior / `Hash` implementation | |
/// | |
-/// In this case, the `Hash` implementation of `Key` does _not_ depend on the fields which Clippy | |
-/// considers mutable (the atomics) and so it is actually safe against differing hashes being | |
-/// generated. We personally allow this Clippy lint in places where we store the key, such as | |
-/// helper types in the `metrics-util` crate, and you may need to do the same if you're using it in | |
-/// such a way as well. | |
-#[derive(Debug)] | |
+/// As part of the overall performance posture of `metrics`, we generate the hash for a `Key` at the | |
+/// time of construction, whether constructed in a constant fashion or not. Specific helper | |
+/// utilities will access this pre-generated hash via [`Key::get_hash`], but there are still times | |
+/// where the typical [Hash](std::hash::Hash) trait muyst be used, such as when storing a `Key` in a | |
+/// map or set. As keys may need to be rehashed when the underlying storage is resized, this | |
+/// presents an opportunity for drift between the pre-generated hash and the hash returned by `Key` | |
+/// when using the `Hash` trait. | |
+/// | |
+/// Callers _MUST_ use the [`KeyHasher`](crate::KeyHasher) hasher for any maps or sets where `Key` | |
+/// is used as the key. This will ensure that hashes match if they need to be regenerated. | |
+#[derive(Clone, Debug)] | |
pub struct Key { | |
name: SharedString, | |
labels: Cow<'static, [Label]>, | |
- hashed: AtomicBool, | |
- hash: AtomicU64, | |
+ hash: u64, | |
} | |
impl Key { | |
@@ -60,12 +55,7 @@ impl Key { | |
where | |
N: Into<SharedString>, | |
{ | |
- Self { | |
- name: name.into(), | |
- labels: Cow::<[Label]>::const_slice(labels), | |
- hashed: AtomicBool::new(false), | |
- hash: AtomicU64::new(0), | |
- } | |
+ Self::builder(name.into(), Cow::<[Label]>::const_slice(labels)) | |
} | |
/// Creates a [`Key`] from a static name. | |
@@ -82,20 +72,16 @@ impl Key { | |
Self { | |
name: Cow::const_str(name), | |
labels: Cow::<[Label]>::const_slice(labels), | |
- hashed: AtomicBool::new(false), | |
- hash: AtomicU64::new(0), | |
+ hash: const_hasher(name, labels), | |
} | |
} | |
fn builder(name: SharedString, labels: Cow<'static, [Label]>) -> Self { | |
- let hash = generate_key_hash(&name, &labels); | |
+ let mut hasher = Fnv1aHasher::new(); | |
+ non_const_hasher(&mut hasher, &name, &labels); | |
+ let hash = hasher.finish(); | |
- Self { | |
- name, | |
- labels, | |
- hashed: AtomicBool::new(true), | |
- hash: AtomicU64::new(hash), | |
- } | |
+ Self { name, labels, hash } | |
} | |
/// Name of this key. | |
@@ -128,36 +114,35 @@ impl Key { | |
/// Gets the hash value for this key. | |
pub fn get_hash(&self) -> u64 { | |
- if self.hashed.load(Ordering::Acquire) { | |
- self.hash.load(Ordering::Acquire) | |
- } else { | |
- let hash = generate_key_hash(&self.name, &self.labels); | |
- self.hash.store(hash, Ordering::Release); | |
- self.hashed.store(true, Ordering::Release); | |
- hash | |
- } | |
+ self.hash | |
} | |
} | |
-fn generate_key_hash(name: &SharedString, labels: &Cow<'static, [Label]>) -> u64 { | |
- let mut hasher = KeyHasher::default(); | |
- key_hasher_impl(&mut hasher, name, labels); | |
- hasher.finish() | |
+const fn const_hasher(name: &'static str, labels: &'static [Label]) -> u64 { | |
+ let mut hasher = KeyHasher::new(); | |
+ hasher = const_hash_str(hasher, name); | |
+ let mut i = 0; | |
+ let n = labels.len(); | |
+ while i < n { | |
+ hasher = const_hash_str(hasher, labels[i].0.const_as_ref()); | |
+ hasher = const_hash_str(hasher, labels[i].1.const_as_ref()); | |
+ i += 1; | |
+ } | |
+ | |
+ hasher.const_finish() | |
} | |
-fn key_hasher_impl<H: Hasher>(state: &mut H, name: &SharedString, labels: &Cow<'static, [Label]>) { | |
- name.hash(state); | |
- labels.hash(state); | |
+const fn const_hash_str(mut hasher: KeyHasher, s: &str) -> KeyHasher { | |
+ let bytes = s.as_bytes(); | |
+ hasher = hasher.const_write(&bytes.len().to_ne_bytes()); | |
+ hasher.const_write(bytes) | |
} | |
-impl Clone for Key { | |
- fn clone(&self) -> Self { | |
- Self { | |
- name: self.name.clone(), | |
- labels: self.labels.clone(), | |
- hashed: AtomicBool::new(self.hashed.load(Ordering::Acquire)), | |
- hash: AtomicU64::new(self.hash.load(Ordering::Acquire)), | |
- } | |
+fn non_const_hasher<H: Hasher>(state: &mut H, name: &SharedString, labels: &Cow<'static, [Label]>) { | |
+ let name = name.as_ref().as_bytes(); | |
+ name.hash(state); | |
+ for label in labels.as_ref() { | |
+ label.hash(state); | |
} | |
} | |
@@ -183,7 +168,7 @@ impl Ord for Key { | |
impl Hash for Key { | |
fn hash<H: Hasher>(&self, state: &mut H) { | |
- key_hasher_impl(state, &self.name, &self.labels); | |
+ non_const_hasher(state, &self.name, &self.labels); | |
} | |
} | |
@@ -232,8 +217,11 @@ where | |
#[cfg(test)] | |
mod tests { | |
use super::Key; | |
- use crate::Label; | |
- use std::collections::HashMap; | |
+ use crate::{KeyHasher, Label}; | |
+ use std::{ | |
+ collections::HashMap, | |
+ hash::{Hash, Hasher}, | |
+ }; | |
static BORROWED_NAME: &'static str = "name"; | |
static FOOBAR_NAME: &'static str = "foobar"; | |
@@ -329,4 +317,38 @@ mod tests { | |
"Key(foobar, [black = black, lives = lives, matter = matter])" | |
); | |
} | |
+ | |
+ #[test] | |
+ fn test_static_vs_dynamic_hashing() { | |
+ let get_non_const_hash = |key: &Key| { | |
+ let mut hasher = KeyHasher::new(); | |
+ key.hash(&mut hasher); | |
+ hasher.finish() | |
+ }; | |
+ | |
+ let from_name = Key::from_name(FOOBAR_NAME); | |
+ let from_static_name = Key::from_static_name(FOOBAR_NAME); | |
+ assert_eq!(from_name.get_hash(), from_static_name.get_hash()); | |
+ assert_eq!(get_non_const_hash(&from_name), from_name.get_hash()); | |
+ assert_eq!( | |
+ get_non_const_hash(&from_static_name), | |
+ from_static_name.get_hash() | |
+ ); | |
+ assert_eq!(from_name.get_hash(), from_static_name.get_hash()); | |
+ | |
+ let from_parts = Key::from_parts(FOOBAR_NAME, LABELS.iter()); | |
+ let from_static_labels = Key::from_static_labels(FOOBAR_NAME, &LABELS); | |
+ let from_static_parts = Key::from_static_parts(FOOBAR_NAME, &LABELS); | |
+ assert_eq!(get_non_const_hash(&from_parts), from_parts.get_hash()); | |
+ assert_eq!( | |
+ get_non_const_hash(&from_static_labels), | |
+ from_static_labels.get_hash() | |
+ ); | |
+ assert_eq!( | |
+ get_non_const_hash(&from_static_parts), | |
+ from_static_parts.get_hash() | |
+ ); | |
+ assert_eq!(from_parts.get_hash(), from_static_labels.get_hash()); | |
+ assert_eq!(from_static_labels.get_hash(), from_static_parts.get_hash()); | |
+ } | |
} | |
diff --git a/metrics/src/label.rs b/metrics/src/label.rs | |
index bcffd68..444454b 100644 | |
--- a/metrics/src/label.rs | |
+++ b/metrics/src/label.rs | |
@@ -1,4 +1,4 @@ | |
-use std::slice::Iter; | |
+use std::{hash, slice::Iter}; | |
use crate::SharedString; | |
use alloc::vec::Vec; | |
@@ -13,7 +13,7 @@ use alloc::vec::Vec; | |
/// the request currently being processed, or the request path being processed. Another example may | |
/// be that if you were running a piece o code that was turned on or off by a feature toggle, you may | |
/// wish to include a label in metrics to indicate whether or not they were using the feature toggle. | |
-#[derive(PartialEq, Eq, Hash, Clone, Debug, PartialOrd, Ord)] | |
+#[derive(PartialEq, Eq, Clone, Debug, PartialOrd, Ord)] | |
pub struct Label(pub(crate) SharedString, pub(crate) SharedString); | |
impl Label { | |
@@ -47,6 +47,13 @@ impl Label { | |
} | |
} | |
+impl hash::Hash for Label { | |
+ fn hash<H: hash::Hasher>(&self, state: &mut H) { | |
+ self.0.as_ref().as_bytes().hash(state); | |
+ self.1.as_ref().as_bytes().hash(state); | |
+ } | |
+} | |
+ | |
impl<K, V> From<&(K, V)> for Label | |
where | |
K: Into<SharedString> + Clone, | |
diff --git a/metrics/src/lib.rs b/metrics/src/lib.rs | |
index 903b758..1ed3b74 100644 | |
--- a/metrics/src/lib.rs | |
+++ b/metrics/src/lib.rs | |
@@ -262,6 +262,9 @@ mod cow; | |
mod handles; | |
pub use self::handles::*; | |
+mod hashing; | |
+pub use self::hashing::KeyHasher; | |
+ | |
mod key; | |
pub use self::key::*; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment