Created
November 10, 2015 13:19
-
-
Save yuikns/4d6cfe38b084fd903f0c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2014 Yu Jing<[email protected]> | |
#include <cstdio> | |
#include <syslog.h> // syslog | |
#include <vector> | |
#include <string> | |
#include <map> | |
#include <set> | |
#include "argcv/argcv.hh" | |
#include "argcv/alg/alg.hh" | |
#include "argcv/ir/ir.hh" | |
#include "argcv/ir/index/index.hh" | |
#include "argcv/ir/index/analyzer/basic_analyzer.hh" | |
#include "argcv/ir/index/analyzer/basic_tokenlizer.hh" | |
#include "argcv/ir/index/analyzer/util.hh" | |
#include "argcv/ml/ml.hh" | |
#include "argcv/ml/perceptron.hh" | |
#include "argcv/ml/aprf.hh" | |
#include "argcv/ml/logistic_regression.hh" | |
#include "argcv/ml/naive_bayes.hh" // | |
#include "argcv/ml/sgd.hh" // | |
#include "argcv/ml/svm.hh" // | |
#include "argcv/nio/tcp_listen.hh" | |
#include "argcv/random/random.hh" | |
#include "argcv/storage/hd_storage.hh" | |
#include "argcv/string/string.hh" | |
#include "argcv/string/hash.hh" | |
#include "argcv/string/uuid.hh" | |
#include "argcv/sys/daemon.h" | |
#include "argcv/timer/timer.hh" | |
#include "argcv/thread/threads.hh" | |
#include "argcv/type/opt.hh" | |
#include "argcv/wrapper/leveldb_wr.hh" | |
#include "argcv/wrapper/ini_wr.hh" | |
//#include "argcv/wrapper/curl_wr.hh" | |
using argcv::argcv_info; | |
using argcv::string::blz_hasher; | |
using argcv::ir::tf_idf; | |
using argcv::ml::dataset_d; | |
using namespace argcv::string; | |
using namespace argcv::alg; | |
using namespace argcv::random; | |
using namespace argcv::nio; | |
using namespace argcv::ml; | |
using namespace argcv::storage; | |
using namespace argcv::type; | |
using namespace argcv::wrapper::leveldb; | |
using namespace argcv::wrapper::ini; | |
// using namespace argcv::wrapper::curl; | |
using namespace argcv::ir::index; | |
using namespace argcv::ir::index::analyzer; | |
using namespace argcv::thread; | |
using namespace std; | |
bool ldb_test_key_value_printer(const std::string& k, const std::string& v, void* data) { | |
int* _offset = (int*)data; | |
printf("%d key: %s \t value: %s \n", (*_offset)++, k.c_str(), v.c_str()); | |
return true; //(*_offset) < 2; | |
} | |
void ldb_test() { | |
// const char* ddir = "test_case_leveldb_wr.data"; | |
// ldb_wr lw(ddir, 0, true); | |
hd_storage lw("config.ini"); | |
lw.conn(); | |
lw.put("a", "00"); | |
lw.put("a01", "01"); | |
lw.put("a02", "02"); | |
// lw.put("a03", "03"); | |
// printf("destroy status : %d \n", ldb_wr::destroy(ddir)); | |
lw.put("a04", "04"); | |
lw.put("b01", "04"); | |
lw.put("b03", "05"); | |
printf("exist a? %d \n", lw.exist("a") ? 1 : 0); | |
std::map<std::string, std::pair<std::string, std::string>> kvs_to_put; | |
kvs_to_put["a07"] = std::make_pair("x07", "a07"); | |
kvs_to_put["a08"] = std::make_pair("x08", "a07"); | |
kvs_to_put["a09"] = std::make_pair("x09", "a07"); | |
kvs_to_put["a10"] = std::make_pair("", "a07"); | |
std::set<std::pair<std::string, std::string>> keys_to_del; | |
keys_to_del.insert(std::make_pair("a04", "a04")); | |
// lw.batch_put(kvs_to_put); | |
// lw.batch_rm(keys_to_del); | |
lw.rm("a02"); | |
// lw["a80"] = "80"; | |
// printf("single get : a80: %s \n", static_cast<std::string>(lw["a80"]).c_str()); | |
// printf("single get : a81: %s \n", static_cast<std::string>(lw["a81"]).c_str()); | |
// printf("single exist : a81: %d \n", static_cast<bool>(lw["a81"])); | |
auto bh = lw.batch_writer(); | |
bh->put("a90", "901"); | |
bh->put("a91", "911"); | |
bh->rm("a91"); | |
bh->put("a92", "921"); | |
bh->commit(); | |
delete bh; | |
int i = 0; | |
lw.start_with("a", ldb_test_key_value_printer, &i); | |
// printf("destroy status : %d \n", ldb_wr::destroy(ddir)); | |
printf("is closed ? %d \n", lw.is_closed() ? 1 : 0); | |
fflush(NULL); | |
lw.close(); | |
printf("is closed ? %d \n", lw.is_closed() ? 1 : 0); | |
fflush(NULL); | |
} | |
void echo_server() { | |
tcp_listen pool(9527, 200000); | |
size_t sz_min_sleep = 100; | |
size_t sz_max_sleep = 300000; | |
size_t sz_sleep = sz_min_sleep; | |
if (pool._error_no() != 0) { | |
printf("pool establish failed .. %d \n", pool._error_no()); | |
} else { | |
printf("pool established .. %d \n", pool._error_no()); | |
std::map<int, int32_t> id_2_sz; | |
for (;;) { | |
int id = pool.poll(0); | |
if (id != -1) { | |
sz_sleep = sz_min_sleep; | |
// printf("#### id: %d\n", id); | |
tcp_listen::conn& c = pool[id]; | |
if (id_2_sz.find(id) == id_2_sz.end()) { | |
int32_t st = pool.pull(id, 4); | |
if (st >= 4) { | |
// printf("data:[%s] %lu \n",c.to_str().c_str(), c.to_str().length()); | |
std::string s = c.to_str(); | |
// for (size_t i = 0; i < c.to_str().length(); i++) { | |
// printf("%lu %d %c\n", i, c.to_str()[i], c.to_str()[i]); | |
//} | |
// sleep(3); | |
// c.write(c.to_str(), c.to_str().length()); | |
int32_t ssz = 0; | |
for (size_t i = 0; i < s.length(); i++) { | |
// printf("%d add: %d\n", ssz, s[i]); | |
ssz = (ssz << 8) | (0xff & s[i]); | |
} | |
// printf("size: %d \n", ssz); | |
id_2_sz[id] = ssz; | |
c.flush(); | |
c.clear(); | |
} else { | |
if (c.closed()) { | |
id_2_sz.erase(id); | |
// printf("is closed .. \n"); | |
} else { | |
// printf("not prepared ? \n status : %hhu\n", c._status()); | |
} | |
c.flush(); | |
} | |
} else { | |
// printf("let's get file size : %d \n", id_2_sz[id]); | |
int32_t st = pool.pull(id, id_2_sz[id]); | |
// printf("size : %d vs %d %s\n", st, id_2_sz[id], | |
// st >= id_2_sz[id] ? "all prepared" : "not fully prepared"); | |
if (st >= id_2_sz[id]) { | |
// printf("data:[%s] %lu \n",c.to_str().c_str(), c.to_str().length()); | |
std::string s = c.to_str() + "[got]"; | |
// for (size_t i = 0; i < c.to_str().length(); i++) { | |
// printf("%lu %d %c\n", i, c.to_str()[i], c.to_str()[i]); | |
//} | |
// sleep(3); | |
// c.write(c.to_str(), c.to_str().length()); | |
// printf("handle : [%s]\n", s.c_str()); | |
c.write(c.to_str(), c.to_str().length()); | |
c.flush(); | |
c.clear(); | |
id_2_sz.erase(id); | |
} else { | |
// printf("continue ... \n"); | |
if (c.closed()) { | |
id_2_sz.erase(id); | |
// printf("is closed .. \n"); | |
} else { | |
// printf("not prepared ? \n status : %hhu\n", c._status()); | |
} | |
c.flush(); | |
} | |
} | |
} else { | |
// printf("sleep ...[%lu] time %lu\n",loop++,sz_sleep); | |
// fflush(NULL); | |
sz_sleep *= 2; | |
if (sz_sleep > sz_max_sleep) { | |
sz_sleep = sz_max_sleep; | |
} | |
usleep(sz_sleep); | |
} | |
} | |
printf("stop..... \n"); | |
} | |
} | |
bool y(std::vector<double> x) { | |
// printf("%f \n",x[0] * 0.2 + x[1] * 0.8 + x[2] * 0.3); | |
return (x[0] * 0.1 * x[1] * 0.8 - x[2] * 0.3 - x[3] * 0.1 + x[4] * 0.2 + 0.2 > 0.5); | |
} | |
std::pair<std::vector<std::string>, std::string> get_pair(const std::string& x1, const std::string& x2, | |
const std::string& y) { | |
std::vector<std::string> x = {x1, x2}; | |
return std::make_pair(x, y); | |
} | |
void naive_bayse_test() { | |
naive_bayes nb; | |
nb.add(get_pair("1", "S", "-1")); | |
nb.add(get_pair("1", "M", "-1")); | |
nb.add(get_pair("1", "M", "1")); | |
nb.add(get_pair("1", "S", "1")); | |
nb.add(get_pair("1", "S", "-1")); | |
nb.add(get_pair("2", "S", "-1")); | |
nb.add(get_pair("2", "M", "-1")); | |
nb.add(get_pair("2", "M", "1")); | |
nb.add(get_pair("2", "L", "1")); | |
nb.add(get_pair("2", "L", "1")); | |
nb.add(get_pair("3", "L", "1")); | |
nb.add(get_pair("3", "M", "1")); | |
nb.add(get_pair("3", "M", "1")); | |
nb.add(get_pair("3", "L", "1")); | |
nb.add(get_pair("3", "L", "-1")); | |
nb.learn(); | |
std::vector<std::string> x = {"1", "S"}; | |
std::string val = nb.predict(x); | |
printf("final result : %s \n", val.c_str()); | |
} | |
// function | |
double sgd_f(const std::vector<double>& wx, const double b, const std::vector<double>& vx) { | |
double fx = 0; | |
for (size_t i = 0; i < wx.size(); i++) { | |
// fx += pow(vx[i], i + 1) * wx[i]; | |
fx += vx[i] * wx[i]; | |
} | |
return fx + b; | |
} | |
void sgd_test() { | |
std::vector<double> rwx; | |
double rb = 29; | |
/* | |
rwx.push_back(1); | |
rwx.push_back(3); | |
rwx.push_back(5); | |
rwx.push_back(7); | |
rwx.push_back(9); | |
rwx.push_back(11); | |
rwx.push_back(13); | |
rwx.push_back(17); | |
rwx.push_back(19); | |
rwx.push_back(23);*/ | |
/* | |
for (size_t ix = 0; ix < 500; ix++) { | |
rwx.push_back(ix); | |
}*/ | |
rwx.push_back(9); | |
rwx.push_back(5); | |
rwx.push_back(2); | |
rwx.push_back(7); | |
// rwx.push_back(3); | |
//dataset<double, double> data; | |
sgd m; | |
for (size_t ix = 0; ix < 10000; ix++) { | |
std::vector<double> vx; | |
for (size_t jx = 0; jx < rwx.size(); jx++) { | |
vx.push_back(random_int() % 10000 * 1.0 / 100); | |
} | |
// printf("%.2f * %.2f + %.2f * %.2f ^ 2 + %.2f * %.2f ^ 3 + %.2f * %.2f ^ 4 + %.2f = %.2f \n", | |
// rwx[0], | |
// vx[0], rwx[1], vx[1], rwx[2], vx[2], rwx[3], vx[3], rb, sgd_f(rwx, rb, vx)); | |
m.add(vx, sgd_f(rwx, rb, vx)); | |
} | |
printf("f(x) = %.2f * x + %.2f * x ^ 2 + %.2f * x ^ 3 + %.2f * x ^ 4 + ... + %.2f \n", rwx[0], rwx[1], | |
rwx[2], rwx[3], rb); | |
bool status = m.learn(); | |
printf("learn status : %s \n", status ? "ok" : "failed"); | |
} | |
class opt_obj { | |
public: | |
opt_obj() { | |
_val = new size_t; | |
*_val = 0; | |
printf("a empty ~~ \n"); | |
} | |
opt_obj(size_t val) { | |
_val = new size_t; | |
*_val = val; | |
printf("init : %zu \n", *_val); | |
} | |
~opt_obj() { printf("delete : %zu \n", *_val); } | |
size_t data() { return *_val; } | |
public: | |
size_t* _val; | |
}; | |
void opt_test() { | |
std::vector<opt<opt_obj>> optv; | |
optv.push_back(opt<opt_obj>(nullptr)); | |
optv.push_back(opt<opt_obj>(new opt_obj(1))); | |
optv.push_back(opt<opt_obj>(new opt_obj(2))); | |
optv.push_back(opt<opt_obj>(new opt_obj(3))); | |
printf("my copy start \n"); | |
opt<opt_obj> v; | |
v = optv[2]; | |
printf("my copy end 1 %zu\n", v.nref()); | |
opt<opt_obj> v2 = optv[2]; | |
printf("my copy end 2 %zu\n", v.nref()); | |
for (size_t ix = 1; ix < 3; ix++) printf("%zu \n", optv[ix].get().data()); | |
printf("%zu \n", v.get().data()); | |
printf("~~~~~~~~~~~\n"); | |
opt<int> io; | |
io.get() = 10; | |
printf("io : %d", io.get()); | |
std::vector<opt<int>> optv2; | |
optv2.push_back(opt<int>()); | |
} | |
int main(int argc, char* argv[]) { | |
opt_test(); | |
sgd_test(); | |
// naive_bayse_test(); | |
// ldb_test(); | |
// std::vector<std::string> elems = utf8split("abcd\u00A0你好世界123\n"); | |
// for(size_t i = 0 ; i < elems.size(); i ++ ) { | |
// printf("%zu (%s):%lu\n",i,elems[i].c_str(),elems[i].length()); | |
//} | |
// hd_storage stg("config.ini"); | |
// index::index idx(&stg); | |
// text_field f("abc"); | |
// long_field f(0xfff); | |
// std::string v = f.to_str(); | |
// for (int i = 0; i < v.length(); i++) { | |
// printf("[%0x] [ %c ]\n", (0xff & v[i]), v[i]); | |
//} | |
// echo_server(); | |
/* | |
for (size_t i = 0; i < rand_vector1.size(); i++) { | |
if (i % 1000 == 0) { | |
std::vector<int> rand_vector_tmp = rand_vector; | |
assert(k_th_find(rand_vector_tmp, i) == rand_vector1[i]); | |
printf("%3zu %6d vs %6d \t%s \n", i, k_th_find(rand_vector_tmp, i), rand_vector1[i], | |
k_th_find(rand_vector_tmp, i) == rand_vector1[i] ? "ok" : "failed"); | |
} | |
}*/ | |
/* | |
const char * url = "https://ssl.argcv.com"; | |
curl_wr cinfo(url); | |
cinfo.referer("http://argcv.com"); | |
cinfo.version(CURL_VERSION::HTTP_VERSION_1_1); | |
cinfo.useragent("argcv/0.1"); | |
//cinfo.useragent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) | |
Chrome/43.0.2357.134 Safari/537.36"); | |
curl_wr_resp resp = cinfo.get(); | |
for(size_t i = 0 ; i < resp.headers.size(); i ++ ) { | |
printf("%zu > %s",i,resp.headers[i].c_str()); | |
} | |
printf("body:[%s]\n",resp.body.c_str()); | |
*/ | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment