This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# somewhere to store output | |
err = StringIO.StringIO() | |
# save a reference to real stderr so we can restore later | |
oldstderr = sys.stderr | |
# set stderr to our StringIO instance | |
sys.stderr = err | |
tp = pd.read_csv(f_in, sep=',', chunksize=1000, encoding='utf-8',quotechar='"', error_bad_lines=False) | |
for chunk in tp: | |
chunk |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* http://en.wikipedia.org/wiki/GBK */ | |
size_t fixGBK(const char *str, size_t len) | |
{ | |
const unsigned char *string = (const unsigned char *)str; | |
size_t idx = 0; | |
for (idx = 0; idx < len; idx++) { | |
int val = string[idx], val2; | |
if (val < 128) | |
continue; | |
if (idx + 1 >= len) |
NewerOlder