Last active
March 23, 2017 17:44
-
-
Save ivan-ushakov/2d048afba76cc2e108eea01be306e644 to your computer and use it in GitHub Desktop.
Calculate frequency of words in text. Version without std containers.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// requires C++11, compile with -std=c++11 flag | |
#include <stdio.h> | |
#include <string.h> | |
#include <ctype.h> | |
#include <algorithm> | |
namespace t1 | |
{ | |
template <typename T> | |
class Vector | |
{ | |
T *data_; | |
size_t size_; | |
size_t cursor_; | |
public: | |
Vector() : data_(nullptr), size_(0), cursor_(0) | |
{ | |
} | |
Vector(size_t size) : Vector() | |
{ | |
data_ = new T[size]; | |
size_ = size; | |
} | |
Vector(const Vector<T> &other) | |
{ | |
data_ = new T[other.size_]; | |
size_ = other.size_; | |
cursor_ = other.cursor_; | |
for (size_t i = 0; i < other.size(); i++) | |
{ | |
data_[i] = other.data_[i]; | |
} | |
} | |
Vector<T> &operator=(Vector<T> other) | |
{ | |
swap(*this, other); | |
return *this; | |
} | |
void push_back(const T &value) | |
{ | |
if (cursor_ == size_) | |
{ | |
Vector<T> t(size_t(1.2 * size_)); | |
for (size_t i = 0; i < size_; i++) | |
{ | |
t.data_[i] = data_[i]; | |
} | |
t.cursor_ = cursor_; | |
swap(*this, t); | |
} | |
data_[cursor_] = value; | |
cursor_ = cursor_ + 1; | |
} | |
T &operator[](size_t index) | |
{ | |
return data_[index]; | |
} | |
const T &operator[](size_t index) const | |
{ | |
return data_[index]; | |
} | |
void clear() | |
{ | |
cursor_ = 0; | |
} | |
size_t size() const | |
{ | |
return cursor_; | |
} | |
T* data() | |
{ | |
return data_; | |
} | |
~Vector() | |
{ | |
if (data_ != nullptr) | |
{ | |
delete[] data_; | |
} | |
} | |
private: | |
void swap(Vector<T> &a, Vector<T> &b) | |
{ | |
{ | |
T *t = a.data_; | |
a.data_ = b.data_; | |
b.data_ = t; | |
} | |
{ | |
size_t t = a.size_; | |
a.size_ = b.size_; | |
b.size_ = t; | |
} | |
{ | |
size_t t = a.cursor_; | |
a.cursor_ = b.cursor_; | |
b.cursor_ = t; | |
} | |
} | |
}; | |
struct String | |
{ | |
char *data; | |
size_t size; | |
String() : data(nullptr), size(0) | |
{ | |
} | |
String(char *data, size_t size) : data(data), size(size) | |
{ | |
} | |
}; | |
bool operator==(const String &a, const String &b) | |
{ | |
if (a.size != b.size) | |
{ | |
return false; | |
} | |
return memcmp(a.data, b.data, a.size) == 0; | |
} | |
bool operator<(const String &a, const String &b) | |
{ | |
size_t size = a.size < b.size ? a.size : b.size; | |
int r = memcmp(a.data, b.data, size); | |
if (r < 0) | |
{ | |
return true; | |
} | |
if (r > 0) | |
{ | |
return false; | |
} | |
return a.size < b.size; | |
} | |
struct Counter | |
{ | |
String word; | |
int value; | |
void increase() | |
{ | |
value = value + 1; | |
} | |
}; | |
class File | |
{ | |
FILE *file_; | |
bool error_; | |
char *buffer_; | |
size_t begin_; | |
size_t end_; | |
static const size_t BufferSize = 2 * 4096; | |
public: | |
File(const char *name, const char *mode) : file_(NULL), error_(false) | |
{ | |
file_ = fopen(name, mode); | |
buffer_ = new char[BufferSize]; | |
begin_ = 0; | |
end_ = 0; | |
} | |
char get() | |
{ | |
if (begin_ == end_) | |
{ | |
begin_ = 0; | |
end_ = fread(buffer_, sizeof(char), BufferSize, file_); | |
if (end_ == 0) | |
{ | |
error_ = true; | |
return 0; | |
} | |
} | |
return buffer_[begin_++]; | |
} | |
bool fail() const | |
{ | |
return file_ == NULL || error_; | |
} | |
void write(t1::Counter &counter) | |
{ | |
fprintf(file_, "%d ", counter.value); | |
fwrite(counter.word.data, sizeof(char), counter.word.size, file_); | |
fprintf(file_, "\n"); | |
} | |
~File() | |
{ | |
if (file_ != NULL) | |
{ | |
fclose(file_); | |
} | |
if (buffer_ != nullptr) | |
{ | |
delete[] buffer_; | |
} | |
} | |
}; | |
class Guard | |
{ | |
const Vector<String> &vector_; | |
public: | |
Guard(const Vector<String> &vector) : vector_(vector) | |
{ | |
} | |
~Guard() | |
{ | |
for (size_t i = 0; i < vector_.size(); i++) | |
{ | |
if (vector_[i].data != nullptr) | |
{ | |
delete[] vector_[i].data; | |
} | |
} | |
} | |
}; | |
} | |
int main(int argc, char const *argv[]) | |
{ | |
if (argc != 3) | |
{ | |
printf("usage: freqs in.txt out.txt\n"); | |
return 0; | |
} | |
t1::File input(argv[1], "r"); | |
if (input.fail()) | |
{ | |
printf("error: can't read input file\n"); | |
return -1; | |
} | |
t1::Vector<t1::String> words(8); | |
t1::Guard guard(words); | |
t1::Vector<char> buffer(16); | |
while (!input.fail()) | |
{ | |
int c = input.get(); | |
if (isalpha(c)) | |
{ | |
buffer.push_back(tolower(c)); | |
} | |
else | |
{ | |
if (buffer.size() > 0) | |
{ | |
char *word = new char[buffer.size()]; | |
memcpy(word, buffer.data(), buffer.size()); | |
words.push_back(t1::String(word, buffer.size())); | |
buffer.clear(); | |
} | |
} | |
} | |
std::sort(words.data(), words.data() + words.size(), [](const t1::String &a, const t1::String &b) { | |
return a < b; | |
}); | |
t1::Vector<t1::Counter> counters(8); | |
t1::Counter counter; | |
counter.word = words[0]; | |
counter.value = 1; | |
for (size_t i = 1; i < words.size(); i++) | |
{ | |
t1::String &s = words[i]; | |
if (counter.word == s) | |
{ | |
counter.increase(); | |
} | |
else | |
{ | |
counters.push_back(counter); | |
counter.word = s; | |
counter.value = 1; | |
} | |
} | |
counters.push_back(counter); | |
std::sort(counters.data(), counters.data() + counters.size(), [](const t1::Counter &a, const t1::Counter &b) { | |
if (a.value < b.value) | |
{ | |
return false; | |
} | |
if (a.value > b.value) | |
{ | |
return true; | |
} | |
return a.word < b.word; | |
}); | |
t1::File output(argv[2], "w"); | |
if (output.fail()) | |
{ | |
printf("error: can't create output file\n"); | |
return -1; | |
} | |
for (size_t i = 0; i < counters.size(); i++) | |
{ | |
output.write(counters[i]); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment