Created
September 12, 2023 14:26
-
-
Save anthonyprintup/8aae8d845c9f02233abc4370d88ea14c to your computer and use it in GitHub Desktop.
A compile-time byte pattern matcher designed to match IDA patterns.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Created by Anthony Printup on 4/21/2023. | |
#pragma once | |
#include <algorithm> | |
#include <bitset> | |
#include <cstdint> | |
#include <exception> | |
#include <functional> | |
#include <ranges> | |
#include "../strings.hpp" | |
#include "pattern.hpp" | |
namespace patterns::ida { | |
constexpr auto characters_per_byte {2uz}; | |
constexpr auto wildcard_character = '?'; | |
constexpr auto delimiter_character = ' '; | |
template<typename T> | |
concept ContiguousRange = std::ranges::contiguous_range<T> and std::ranges::sized_range<T>; | |
inline constexpr auto is_pure_wildcard = [] [[nodiscard]] ( | |
const ContiguousRange auto characters) constexpr noexcept { | |
using utilities::IsCharacter; | |
return std::ranges::all_of(characters, [] [[nodiscard]] (const IsCharacter auto character) constexpr noexcept { | |
return character == wildcard_character; | |
}); | |
}; | |
inline constexpr auto to_byte = | |
[] [[nodiscard]] (const ContiguousRange auto characters) constexpr noexcept -> ByteType { | |
using utilities::from_hex_character; | |
if (std::ranges::size(characters) != characters_per_byte) | |
return {}; | |
return from_hex_character(characters[0uz]) << 4uz | from_hex_character(characters[1uz]); | |
}; | |
template<auto... Characters> | |
requires(utilities::IsCharacter<decltype(Characters)> && ...) | |
consteval std::size_t count_bytes() noexcept { | |
using utilities::to_array; | |
auto bytes = to_array<Characters...>() | std::views::split(delimiter_character); | |
return static_cast<std::size_t>(std::ranges::distance(bytes)); | |
} | |
template<auto... Characters> | |
requires(utilities::IsCharacter<decltype(Characters)> && ...) | |
consteval std::size_t count_mask_bits() noexcept { | |
return count_bytes<Characters...>() * patterns::masks_per_byte; | |
} | |
template<auto... Characters> | |
requires(utilities::IsCharacter<decltype(Characters)> && ...) | |
consteval bool is_pattern_valid() noexcept { | |
using utilities::from_hex_character; | |
using utilities::to_array; | |
constexpr auto characters = to_array<Characters...>(); | |
auto byte_characters_range = characters | std::views::split(delimiter_character); | |
if (not byte_characters_range) | |
return false; | |
// Verify that all the characters are valid | |
const auto all_characters_valid = std::ranges::all_of( | |
byte_characters_range, [] [[nodiscard]] (const ContiguousRange auto byte_characters) constexpr noexcept { | |
// Empty sets aren't allowed | |
if (not std::ranges::size(byte_characters)) | |
return false; | |
// Skip pure wildcards (character sets which only contain the wildcard character(s)) | |
if (is_pure_wildcard(byte_characters)) | |
return true; | |
// Verify that there are at least two characters for every byte | |
if (std::ranges::size(byte_characters) != characters_per_byte) | |
return false; | |
// Filter wildcard characters and '0' and check if from_hex_character returns a valid value | |
return std::ranges::all_of( | |
byte_characters | std::views::filter([] [[nodiscard]] (const auto character) constexpr noexcept { | |
return character != wildcard_character and character != '0'; | |
}), | |
[] [[nodiscard]] (const auto character) constexpr noexcept { | |
return from_hex_character(character) != ByteType {}; | |
}); | |
}); | |
// Verify that the edges aren't pure wildcards | |
const auto filter_predicate = | |
[i = 0uz, size = static_cast<std::size_t>(std::ranges::distance(byte_characters_range))] [[nodiscard]] ( | |
const ContiguousRange auto) constexpr mutable noexcept { | |
const auto current_index = i++; | |
return not current_index or current_index == size - 1uz; | |
}; | |
const auto edges_are_not_wildcards = | |
std::ranges::none_of(byte_characters_range | std::views::filter(filter_predicate), is_pure_wildcard); | |
return all_characters_valid and edges_are_not_wildcards; | |
} | |
template<std::size_t DiscardedByteCount, auto... Characters> | |
requires(utilities::IsCharacter<decltype(Characters)> && ...) | |
consteval auto create_byte_array() noexcept { | |
using utilities::to_array; | |
constexpr auto characters = to_array<Characters...>(); | |
std::array<ByteType, count_bytes<Characters...>() - DiscardedByteCount> bytes {}; | |
for (std::size_t discarded_byte_count {}, byte_offset {}; | |
const auto byte_characters : characters | std::views::split(delimiter_character)) { | |
if (discarded_byte_count++ < DiscardedByteCount) | |
continue; | |
bytes[byte_offset++] = is_pure_wildcard(byte_characters) ? ByteType {} : to_byte(byte_characters); | |
} | |
return bytes; | |
} | |
template<std::size_t DiscardedByteCount, auto... Characters> | |
requires(utilities::IsCharacter<decltype(Characters)> && ...) | |
consteval auto create_mask_bitset() noexcept { | |
using utilities::to_array; | |
constexpr auto characters = to_array<Characters...>(); | |
constexpr auto discarded_bit_count = DiscardedByteCount * patterns::masks_per_byte; | |
std::bitset<count_mask_bits<Characters...>() - discarded_bit_count> mask_bits {}; | |
for (std::size_t discarded_byte_count {}, mask_bit_offset {}; | |
const auto byte_characters : characters | std::views::split(delimiter_character)) { | |
if (discarded_byte_count++ < DiscardedByteCount) | |
continue; | |
const auto character_count = std::ranges::size(byte_characters); | |
if (byte_characters[0uz] == wildcard_character) | |
mask_bits.set(mask_bit_offset); | |
if (byte_characters[character_count == 2uz ? 1uz : 0uz] == wildcard_character) | |
mask_bits.set(mask_bit_offset + 1uz); | |
mask_bit_offset += patterns::masks_per_byte; | |
} | |
return mask_bits; | |
} | |
template<auto... Characters> | |
requires(utilities::IsCharacter<decltype(Characters)> && ...) | |
struct Pattern { | |
static_assert(is_pattern_valid<Characters...>(), "Invalid pattern"); | |
using InputDataType = std::span<const ByteType>; | |
private: | |
template<std::size_t DataOffset = 0uz> [[nodiscard]] constexpr bool test(const ByteType *data) const noexcept { | |
constexpr auto pattern_size = count_bytes<Characters...>(); | |
if constexpr (DataOffset < pattern_size) { | |
constexpr auto remaining_bytes = pattern_size - DataOffset; | |
using DataType = decltype([] consteval noexcept { | |
if constexpr (remaining_bytes >= sizeof(std::uint64_t)) | |
return static_cast<std::uint64_t>(0uz); | |
else if constexpr (remaining_bytes >= sizeof(std::uint32_t)) | |
return static_cast<std::uint32_t>(0uz); | |
else if constexpr (remaining_bytes >= sizeof(std::uint16_t)) | |
return static_cast<std::uint16_t>(0uz); | |
else if constexpr (remaining_bytes == sizeof(std::uint8_t)) | |
return static_cast<std::uint8_t>(0uz); | |
else | |
throw std::exception {}; | |
}()); | |
constexpr auto mask_bits = create_mask_bitset<DataOffset, Characters...>(); | |
constexpr auto data_mask = [=] consteval noexcept -> DataType { | |
std::bitset<sizeof(DataType) * CHAR_BIT> bit_mask {}; | |
for (auto byte_index {0uz}; byte_index < sizeof(DataType); ++byte_index) { | |
const auto mask_bit_index = byte_index * patterns::masks_per_byte; | |
if (not mask_bits.test(mask_bit_index)) | |
for (auto bit_index {0uz}; bit_index < 4uz; ++bit_index) | |
bit_mask.set(byte_index * CHAR_BIT + 4uz + bit_index); | |
if (not mask_bits.test(mask_bit_index + 1uz)) | |
for (auto bit_index {0uz}; bit_index < 4uz; ++bit_index) | |
bit_mask.set(byte_index * CHAR_BIT + bit_index); | |
} | |
return static_cast<DataType>(bit_mask.to_ullong()); | |
}(); | |
constexpr auto bytes = create_byte_array<DataOffset, Characters...>(); | |
if ((*reinterpret_cast<const DataType *>(data + DataOffset) & data_mask) != | |
*reinterpret_cast<const DataType *>(bytes.data())) | |
return false; | |
// Skip pure wildcard bytes | |
constexpr auto bytes_to_skip = [=] consteval noexcept -> std::size_t { | |
std::size_t pure_wildcard_bytes {}; | |
for (auto bit_index {sizeof(DataType) * patterns::masks_per_byte}; bit_index < mask_bits.size(); | |
bit_index += patterns::masks_per_byte, ++pure_wildcard_bytes) | |
if (not mask_bits.test(bit_index) or not mask_bits.test(bit_index + 1uz)) | |
break; | |
return pure_wildcard_bytes; | |
}(); | |
return Pattern::test<DataOffset + sizeof(DataType) + bytes_to_skip>(data); | |
} else | |
return true; | |
} | |
public: | |
[[nodiscard]] constexpr InputDataType::iterator find(const InputDataType data) const noexcept { | |
constexpr auto pattern_size = count_bytes<Characters...>(); | |
for (auto offset {0z}; offset < static_cast<decltype(offset)>(data.size_bytes() - pattern_size); ++offset) | |
if (this->test(&data[static_cast<InputDataType::size_type>(offset)])) | |
return data.begin() + offset; | |
return data.end(); | |
} | |
}; | |
template<utilities::IsCharacter T, T... Characters> [[nodiscard]] consteval auto operator""_ida() noexcept { | |
return Pattern<Characters...> {}; | |
} | |
}// namespace patterns::ida |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Created by Anthony Printup on 4/21/2023. | |
#pragma once | |
#include <cstddef> | |
namespace patterns { | |
using ByteType = std::byte; | |
constexpr auto masks_per_byte {2uz}; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Created by Anthony Printup on 8/18/2022. | |
#pragma once | |
#include <array> | |
#include <type_traits> | |
#include <string_view> | |
namespace utilities { | |
template<class T> | |
concept IsCharacter = | |
std::is_same_v<T, char> || std::is_same_v<T, wchar_t> || | |
std::is_same_v<T, char8_t> || std::is_same_v<T, char16_t> || | |
std::is_same_v<T, char32_t>; | |
template<auto... Characters> | |
requires (IsCharacter<decltype(Characters)> && ...) | |
consteval auto to_array() noexcept { | |
return std::array {Characters...}; | |
} | |
template <class Lambda, std::size_t... Indices> | |
consteval auto create_sequence_helper(Lambda lambda, const std::index_sequence<Indices...>) noexcept { | |
return std::integer_sequence<typename std::invoke_result_t<Lambda>::value_type, lambda()[Indices]...> {}; | |
} | |
template<auto T> using StringSequence = decltype( | |
create_sequence_helper(T, std::make_index_sequence<T().size()> {})); | |
[[nodiscard]] constexpr std::byte from_hex_character(const IsCharacter auto character) noexcept { | |
if ('0' <= character and character <= '9') | |
return static_cast<std::byte>(character - '0'); | |
if ('a' <= character and character <= 'f') | |
return static_cast<std::byte>(character - 'a' + 10); | |
if ('A' <= character and character <= 'F') | |
return static_cast<std::byte>(character - 'A' + 10); | |
return {}; | |
} | |
} | |
#define CREATE_CHARACTER_SEQUENCE(string) ::utilities::StringSequence< \ | |
[]() consteval -> std::basic_string_view<std::remove_cvref_t<decltype(*(string))>> { return string; }> |
oopsmishap
commented
Oct 7, 2023
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment