Skip to content

Instantly share code, notes, and snippets.

@yuikns
Created November 10, 2015 13:19
Show Gist options
  • Save yuikns/4d6cfe38b084fd903f0c to your computer and use it in GitHub Desktop.
Save yuikns/4d6cfe38b084fd903f0c to your computer and use it in GitHub Desktop.
// Copyright 2014 Yu Jing<[email protected]>
#include <cstdio>
#include <syslog.h> // syslog
#include <vector>
#include <string>
#include <map>
#include <set>
#include "argcv/argcv.hh"
#include "argcv/alg/alg.hh"
#include "argcv/ir/ir.hh"
#include "argcv/ir/index/index.hh"
#include "argcv/ir/index/analyzer/basic_analyzer.hh"
#include "argcv/ir/index/analyzer/basic_tokenlizer.hh"
#include "argcv/ir/index/analyzer/util.hh"
#include "argcv/ml/ml.hh"
#include "argcv/ml/perceptron.hh"
#include "argcv/ml/aprf.hh"
#include "argcv/ml/logistic_regression.hh"
#include "argcv/ml/naive_bayes.hh" //
#include "argcv/ml/sgd.hh" //
#include "argcv/ml/svm.hh" //
#include "argcv/nio/tcp_listen.hh"
#include "argcv/random/random.hh"
#include "argcv/storage/hd_storage.hh"
#include "argcv/string/string.hh"
#include "argcv/string/hash.hh"
#include "argcv/string/uuid.hh"
#include "argcv/sys/daemon.h"
#include "argcv/timer/timer.hh"
#include "argcv/thread/threads.hh"
#include "argcv/type/opt.hh"
#include "argcv/wrapper/leveldb_wr.hh"
#include "argcv/wrapper/ini_wr.hh"
//#include "argcv/wrapper/curl_wr.hh"
using argcv::argcv_info;
using argcv::string::blz_hasher;
using argcv::ir::tf_idf;
using argcv::ml::dataset_d;
using namespace argcv::string;
using namespace argcv::alg;
using namespace argcv::random;
using namespace argcv::nio;
using namespace argcv::ml;
using namespace argcv::storage;
using namespace argcv::type;
using namespace argcv::wrapper::leveldb;
using namespace argcv::wrapper::ini;
// using namespace argcv::wrapper::curl;
using namespace argcv::ir::index;
using namespace argcv::ir::index::analyzer;
using namespace argcv::thread;
using namespace std;
bool ldb_test_key_value_printer(const std::string& k, const std::string& v, void* data) {
int* _offset = (int*)data;
printf("%d key: %s \t value: %s \n", (*_offset)++, k.c_str(), v.c_str());
return true; //(*_offset) < 2;
}
void ldb_test() {
// const char* ddir = "test_case_leveldb_wr.data";
// ldb_wr lw(ddir, 0, true);
hd_storage lw("config.ini");
lw.conn();
lw.put("a", "00");
lw.put("a01", "01");
lw.put("a02", "02");
// lw.put("a03", "03");
// printf("destroy status : %d \n", ldb_wr::destroy(ddir));
lw.put("a04", "04");
lw.put("b01", "04");
lw.put("b03", "05");
printf("exist a? %d \n", lw.exist("a") ? 1 : 0);
std::map<std::string, std::pair<std::string, std::string>> kvs_to_put;
kvs_to_put["a07"] = std::make_pair("x07", "a07");
kvs_to_put["a08"] = std::make_pair("x08", "a07");
kvs_to_put["a09"] = std::make_pair("x09", "a07");
kvs_to_put["a10"] = std::make_pair("", "a07");
std::set<std::pair<std::string, std::string>> keys_to_del;
keys_to_del.insert(std::make_pair("a04", "a04"));
// lw.batch_put(kvs_to_put);
// lw.batch_rm(keys_to_del);
lw.rm("a02");
// lw["a80"] = "80";
// printf("single get : a80: %s \n", static_cast<std::string>(lw["a80"]).c_str());
// printf("single get : a81: %s \n", static_cast<std::string>(lw["a81"]).c_str());
// printf("single exist : a81: %d \n", static_cast<bool>(lw["a81"]));
auto bh = lw.batch_writer();
bh->put("a90", "901");
bh->put("a91", "911");
bh->rm("a91");
bh->put("a92", "921");
bh->commit();
delete bh;
int i = 0;
lw.start_with("a", ldb_test_key_value_printer, &i);
// printf("destroy status : %d \n", ldb_wr::destroy(ddir));
printf("is closed ? %d \n", lw.is_closed() ? 1 : 0);
fflush(NULL);
lw.close();
printf("is closed ? %d \n", lw.is_closed() ? 1 : 0);
fflush(NULL);
}
void echo_server() {
tcp_listen pool(9527, 200000);
size_t sz_min_sleep = 100;
size_t sz_max_sleep = 300000;
size_t sz_sleep = sz_min_sleep;
if (pool._error_no() != 0) {
printf("pool establish failed .. %d \n", pool._error_no());
} else {
printf("pool established .. %d \n", pool._error_no());
std::map<int, int32_t> id_2_sz;
for (;;) {
int id = pool.poll(0);
if (id != -1) {
sz_sleep = sz_min_sleep;
// printf("#### id: %d\n", id);
tcp_listen::conn& c = pool[id];
if (id_2_sz.find(id) == id_2_sz.end()) {
int32_t st = pool.pull(id, 4);
if (st >= 4) {
// printf("data:[%s] %lu \n",c.to_str().c_str(), c.to_str().length());
std::string s = c.to_str();
// for (size_t i = 0; i < c.to_str().length(); i++) {
// printf("%lu %d %c\n", i, c.to_str()[i], c.to_str()[i]);
//}
// sleep(3);
// c.write(c.to_str(), c.to_str().length());
int32_t ssz = 0;
for (size_t i = 0; i < s.length(); i++) {
// printf("%d add: %d\n", ssz, s[i]);
ssz = (ssz << 8) | (0xff & s[i]);
}
// printf("size: %d \n", ssz);
id_2_sz[id] = ssz;
c.flush();
c.clear();
} else {
if (c.closed()) {
id_2_sz.erase(id);
// printf("is closed .. \n");
} else {
// printf("not prepared ? \n status : %hhu\n", c._status());
}
c.flush();
}
} else {
// printf("let's get file size : %d \n", id_2_sz[id]);
int32_t st = pool.pull(id, id_2_sz[id]);
// printf("size : %d vs %d %s\n", st, id_2_sz[id],
// st >= id_2_sz[id] ? "all prepared" : "not fully prepared");
if (st >= id_2_sz[id]) {
// printf("data:[%s] %lu \n",c.to_str().c_str(), c.to_str().length());
std::string s = c.to_str() + "[got]";
// for (size_t i = 0; i < c.to_str().length(); i++) {
// printf("%lu %d %c\n", i, c.to_str()[i], c.to_str()[i]);
//}
// sleep(3);
// c.write(c.to_str(), c.to_str().length());
// printf("handle : [%s]\n", s.c_str());
c.write(c.to_str(), c.to_str().length());
c.flush();
c.clear();
id_2_sz.erase(id);
} else {
// printf("continue ... \n");
if (c.closed()) {
id_2_sz.erase(id);
// printf("is closed .. \n");
} else {
// printf("not prepared ? \n status : %hhu\n", c._status());
}
c.flush();
}
}
} else {
// printf("sleep ...[%lu] time %lu\n",loop++,sz_sleep);
// fflush(NULL);
sz_sleep *= 2;
if (sz_sleep > sz_max_sleep) {
sz_sleep = sz_max_sleep;
}
usleep(sz_sleep);
}
}
printf("stop..... \n");
}
}
bool y(std::vector<double> x) {
// printf("%f \n",x[0] * 0.2 + x[1] * 0.8 + x[2] * 0.3);
return (x[0] * 0.1 * x[1] * 0.8 - x[2] * 0.3 - x[3] * 0.1 + x[4] * 0.2 + 0.2 > 0.5);
}
std::pair<std::vector<std::string>, std::string> get_pair(const std::string& x1, const std::string& x2,
const std::string& y) {
std::vector<std::string> x = {x1, x2};
return std::make_pair(x, y);
}
void naive_bayse_test() {
naive_bayes nb;
nb.add(get_pair("1", "S", "-1"));
nb.add(get_pair("1", "M", "-1"));
nb.add(get_pair("1", "M", "1"));
nb.add(get_pair("1", "S", "1"));
nb.add(get_pair("1", "S", "-1"));
nb.add(get_pair("2", "S", "-1"));
nb.add(get_pair("2", "M", "-1"));
nb.add(get_pair("2", "M", "1"));
nb.add(get_pair("2", "L", "1"));
nb.add(get_pair("2", "L", "1"));
nb.add(get_pair("3", "L", "1"));
nb.add(get_pair("3", "M", "1"));
nb.add(get_pair("3", "M", "1"));
nb.add(get_pair("3", "L", "1"));
nb.add(get_pair("3", "L", "-1"));
nb.learn();
std::vector<std::string> x = {"1", "S"};
std::string val = nb.predict(x);
printf("final result : %s \n", val.c_str());
}
// function
double sgd_f(const std::vector<double>& wx, const double b, const std::vector<double>& vx) {
double fx = 0;
for (size_t i = 0; i < wx.size(); i++) {
// fx += pow(vx[i], i + 1) * wx[i];
fx += vx[i] * wx[i];
}
return fx + b;
}
void sgd_test() {
std::vector<double> rwx;
double rb = 29;
/*
rwx.push_back(1);
rwx.push_back(3);
rwx.push_back(5);
rwx.push_back(7);
rwx.push_back(9);
rwx.push_back(11);
rwx.push_back(13);
rwx.push_back(17);
rwx.push_back(19);
rwx.push_back(23);*/
/*
for (size_t ix = 0; ix < 500; ix++) {
rwx.push_back(ix);
}*/
rwx.push_back(9);
rwx.push_back(5);
rwx.push_back(2);
rwx.push_back(7);
// rwx.push_back(3);
//dataset<double, double> data;
sgd m;
for (size_t ix = 0; ix < 10000; ix++) {
std::vector<double> vx;
for (size_t jx = 0; jx < rwx.size(); jx++) {
vx.push_back(random_int() % 10000 * 1.0 / 100);
}
// printf("%.2f * %.2f + %.2f * %.2f ^ 2 + %.2f * %.2f ^ 3 + %.2f * %.2f ^ 4 + %.2f = %.2f \n",
// rwx[0],
// vx[0], rwx[1], vx[1], rwx[2], vx[2], rwx[3], vx[3], rb, sgd_f(rwx, rb, vx));
m.add(vx, sgd_f(rwx, rb, vx));
}
printf("f(x) = %.2f * x + %.2f * x ^ 2 + %.2f * x ^ 3 + %.2f * x ^ 4 + ... + %.2f \n", rwx[0], rwx[1],
rwx[2], rwx[3], rb);
bool status = m.learn();
printf("learn status : %s \n", status ? "ok" : "failed");
}
class opt_obj {
public:
opt_obj() {
_val = new size_t;
*_val = 0;
printf("a empty ~~ \n");
}
opt_obj(size_t val) {
_val = new size_t;
*_val = val;
printf("init : %zu \n", *_val);
}
~opt_obj() { printf("delete : %zu \n", *_val); }
size_t data() { return *_val; }
public:
size_t* _val;
};
void opt_test() {
std::vector<opt<opt_obj>> optv;
optv.push_back(opt<opt_obj>(nullptr));
optv.push_back(opt<opt_obj>(new opt_obj(1)));
optv.push_back(opt<opt_obj>(new opt_obj(2)));
optv.push_back(opt<opt_obj>(new opt_obj(3)));
printf("my copy start \n");
opt<opt_obj> v;
v = optv[2];
printf("my copy end 1 %zu\n", v.nref());
opt<opt_obj> v2 = optv[2];
printf("my copy end 2 %zu\n", v.nref());
for (size_t ix = 1; ix < 3; ix++) printf("%zu \n", optv[ix].get().data());
printf("%zu \n", v.get().data());
printf("~~~~~~~~~~~\n");
opt<int> io;
io.get() = 10;
printf("io : %d", io.get());
std::vector<opt<int>> optv2;
optv2.push_back(opt<int>());
}
int main(int argc, char* argv[]) {
opt_test();
sgd_test();
// naive_bayse_test();
// ldb_test();
// std::vector<std::string> elems = utf8split("abcd\u00A0你好世界123\n");
// for(size_t i = 0 ; i < elems.size(); i ++ ) {
// printf("%zu (%s):%lu\n",i,elems[i].c_str(),elems[i].length());
//}
// hd_storage stg("config.ini");
// index::index idx(&stg);
// text_field f("abc");
// long_field f(0xfff);
// std::string v = f.to_str();
// for (int i = 0; i < v.length(); i++) {
// printf("[%0x] [ %c ]\n", (0xff & v[i]), v[i]);
//}
// echo_server();
/*
for (size_t i = 0; i < rand_vector1.size(); i++) {
if (i % 1000 == 0) {
std::vector<int> rand_vector_tmp = rand_vector;
assert(k_th_find(rand_vector_tmp, i) == rand_vector1[i]);
printf("%3zu %6d vs %6d \t%s \n", i, k_th_find(rand_vector_tmp, i), rand_vector1[i],
k_th_find(rand_vector_tmp, i) == rand_vector1[i] ? "ok" : "failed");
}
}*/
/*
const char * url = "https://ssl.argcv.com";
curl_wr cinfo(url);
cinfo.referer("http://argcv.com");
cinfo.version(CURL_VERSION::HTTP_VERSION_1_1);
cinfo.useragent("argcv/0.1");
//cinfo.useragent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko)
Chrome/43.0.2357.134 Safari/537.36");
curl_wr_resp resp = cinfo.get();
for(size_t i = 0 ; i < resp.headers.size(); i ++ ) {
printf("%zu > %s",i,resp.headers[i].c_str());
}
printf("body:[%s]\n",resp.body.c_str());
*/
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment