Created
December 24, 2016 00:39
-
-
Save e-wahyutama/5f30e0fd0224701ee7664777fc96ba26 to your computer and use it in GitHub Desktop.
an attempt of a challenge from: https://gist.github.com/braydo25/f7a9ce7ce7ad7c5fb11ec511887789bc
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// erick[dot]wahyutama[at]gmail.com | |
// Ery E. Wahyutama | |
#include <unordered_map> | |
// https://github.com/json-c/json-c | |
#include <json-c/json.h> | |
#ifndef byte | |
typedef unsigned char byte; | |
#endif | |
struct key | |
{ | |
key(byte* _start, size_t _len) | |
: start(_start) | |
, len(_len) | |
{ | |
} | |
byte* start; | |
size_t len; | |
}; | |
// value will always allocated in the heaps | |
struct value | |
{ | |
value(size_t _off_start, size_t _len) | |
: off_start(_off_start) | |
, len(_len) | |
, next(nullptr) | |
{ | |
} | |
~value() | |
{ | |
if (next) | |
{ | |
delete next; | |
next = nullptr; | |
} | |
} | |
bool intersect(value* val) | |
{ | |
return (off_start + len) > val->off_start; | |
} | |
size_t off_start; | |
size_t len; | |
value* next; | |
}; | |
class cvalue | |
{ | |
public: | |
cvalue(value* val) | |
: m_size(1) | |
, m_data(val) | |
{ | |
} | |
~cvalue() | |
{ | |
delete m_data; | |
} | |
cvalue(cvalue&& other) | |
{ | |
m_data = other.m_data; | |
m_size = other.m_size; | |
other.m_data = nullptr; | |
} | |
// push_front only | |
void push(value* val) | |
{ | |
m_size++; | |
val->next = m_data; | |
m_data = val; | |
} | |
size_t size() | |
{ | |
return m_size; | |
} | |
value* top() | |
{ | |
return m_data; | |
} | |
private: | |
size_t m_size; | |
value* m_data; | |
}; | |
namespace { | |
/* | |
************************************************************************** | |
* * | |
* General Purpose Hash Function Algorithms Library * | |
* * | |
* Author: Arash Partow - 2002 * | |
* URL: http://www.partow.net * | |
* URL: http://www.partow.net/programming/hashfunctions/index.html * | |
* * | |
* Copyright notice: * | |
* Free use of the General Purpose Hash Function Algorithms Library is * | |
* permitted under the guidelines and in accordance with the most current * | |
* version of the Common Public License. * | |
* http://www.opensource.org/licenses/cpl1.0.php * | |
* * | |
************************************************************************** | |
*/ | |
unsigned int APHash(char* str, unsigned int len) | |
{ | |
unsigned int hash = 0xAAAAAAAA; | |
unsigned int i = 0; | |
for (i = 0; i < len; str++, i++) | |
{ | |
hash ^= ((i & 1) == 0) ? ((hash << 7) ^ (*str) * (hash >> 3)) : | |
(~((hash << 11) + ((*str) ^ (hash >> 5)))); | |
} | |
return hash; | |
} | |
} | |
struct hash_key | |
{ | |
size_t operator()(const key& _key) const | |
{ | |
return APHash((char*)_key.start, _key.len); | |
} | |
}; | |
struct equal_key | |
{ | |
std::size_t operator()(const key& key1, const key& key2) const | |
{ | |
return ((key1.len == key2.len) | |
&& memcmp(key1.start, key2.start, key1.len) == 0); | |
} | |
}; | |
typedef std::unordered_map<key, cvalue, hash_key, equal_key> key_value_map; | |
class byte_str | |
{ | |
public: | |
byte_str(byte* buffer, size_t len) | |
: m_buffer(buffer) | |
, m_len(len) | |
{ | |
memset(m_vals, 0, sizeof(m_vals)); | |
m_maxSubLen = m_len / 2 + ((m_len & 1) ? 1 : 0); | |
} | |
~byte_str() | |
{ | |
if (m_json) | |
json_object_put(m_json); | |
} | |
json_object* process() | |
{ | |
if (m_maps.empty()) | |
{ | |
m_json = json_object_new_object(); | |
_process_single_byte(); | |
_process_bytes(); | |
} | |
return m_json; | |
} | |
private: | |
void _process_single_byte() | |
{ | |
for (size_t indeks = 0; indeks < m_len; ++indeks) | |
m_vals[m_buffer[indeks]] += 1; | |
for (size_t indeks = 0; indeks < m_len; ++indeks) | |
{ | |
if (m_vals[indeks] != 0) | |
{ | |
if (m_vals[indeks] > 1) | |
{ | |
char buffer[2] = {}; | |
buffer[0] = indeks; | |
json_object_object_add(m_json, buffer, json_object_new_int(m_vals[indeks])); | |
} | |
} | |
} | |
} | |
void _process_bytes() | |
{ | |
// for every possible iteration | |
// it is impossible to have repeatable substring with sub len > max_len/2 | |
// so discard it | |
for (size_t iter = 1; iter <= m_maxSubLen; ++iter) | |
{ | |
auto sub_len = iter + 1; | |
// for every possible substring | |
// stop if start (pos + len) exceeded sub_len | |
// do the first check really necessary? | |
for (size_t indeks = 0; | |
(indeks < m_len) && (m_len - indeks >= sub_len); | |
++indeks) | |
{ | |
auto end = indeks + sub_len; | |
// start from 2 chars as 1 char already processed; | |
// check for every indeks_start -> end inside buffer | |
for (int indeks_start = indeks; indeks_start < end; ++indeks_start) | |
{ | |
auto found = m_maps.find({ &m_buffer[indeks_start], sub_len }); | |
if (found == m_maps.end()) | |
m_maps.insert(std::make_pair(key(&m_buffer[indeks_start], sub_len), cvalue(new value(indeks_start, sub_len)))); | |
else | |
{ | |
// as long as it is not intersect with previous value | |
// for example with this string (len == 3) | |
// 0xFF 0FF 0xFF 0xFF ... | |
// 1st: 0xFF 0FF 0xFF (0xFF) | |
// 2nd: (0xFF) 0xFF 0FF 0xFF (intersected, hence not valid sequence) | |
// but I believe this case is (might be) rare. | |
if (!(found->second.top()->intersect(&value(indeks_start, sub_len)))) | |
found->second.push(new value(indeks_start, sub_len)); | |
} | |
} | |
} | |
} | |
for (auto& item : m_maps) | |
{ | |
if (item.second.size() > 1) | |
{ | |
char* buffer = new char[item.first.len + 1]; | |
buffer[item.first.len] = '\0'; | |
memcpy(buffer, item.first.start, item.first.len); | |
json_object_object_add(m_json, buffer, json_object_new_int(item.second.size())); | |
delete[] buffer; | |
} | |
} | |
} | |
key_value_map m_maps; | |
size_t m_vals[256]; // hold values | |
byte* m_buffer; | |
size_t m_len; | |
size_t m_maxSubLen; | |
json_object* m_json; | |
}; | |
int main() | |
{ | |
byte val[] = "this this a test"; | |
byte_str bytestr(val, strlen((char*)val)); | |
auto json = bytestr.process(); | |
printf(json_object_to_json_string(json)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Byte processing looks to be O(n^2)