Skip to content

Instantly share code, notes, and snippets.

@rblaze
Created July 31, 2012 11:56
Show Gist options
  • Save rblaze/3216437 to your computer and use it in GitHub Desktop.
Save rblaze/3216437 to your computer and use it in GitHub Desktop.
Соревнования на скорость парсинга
{-# LANGUAGE BangPatterns #-}
module Main where
import qualified Data.ByteString.Lazy.Char8 as BS
import Data.IntMap as M
import Data.Maybe
import Data.Bits
import Data.Char
import Debug.Trace
type AllocMap = M.IntMap Int
readHex :: BS.ByteString -> Int
readHex s = BS.foldl' digit 0 s
where
digit i c = (shiftL i 4) + (toVal c)
toVal v | v >= '0' && v <= '9' = (ord v) - (ord '0')
toVal v = (ord v) - (ord 'a') + 10
getMax :: Int -> Int -> Int -> Int -> AllocMap -> [BS.ByteString] -> (Int, Int)
getMax mb mc _ _ _ [] = (mb, mc)
getMax !mb !mc !b !c !allocs (l:ls) = getMax nmb nmc nb nc nallocs ls
where
(w:ws) = BS.words l
(nmb, nmc, nb, nc, nallocs) = case BS.head w of
'a' -> (max mb (b + size), max mc (c + 1), b + size, c + 1, M.insert addr size allocs)
'f' -> if faddr == 0 then (mb, mc, b, c, allocs) else (mb, mc, b - fsize, c - 1, M.delete faddr allocs)
_ -> error $ "invalid log line " ++ show l
(saddr:ssize:_) = ws
size = readHex ssize
addr = readHex saddr
(fsaddr:[]) = ws
faddr = readHex fsaddr
fsize = fromJust $ M.lookup faddr allocs
main :: IO()
main = do
src <- BS.readFile "/tmp/memlog.1003"
let (maxbytes, maxchunks) = getMax 0 0 0 0 M.empty (BS.lines src)
print maxbytes
print maxchunks
#include <inttypes.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"
#define __unused
struct node {
RB_ENTRY(node) chain;
uint64_t ptr;
uint64_t size;
};
static int
ptrcmp(struct node *n1, struct node *n2) {
if (n1->ptr < n2->ptr) {
return -1;
} else if (n1->ptr == n2->ptr) {
return 0;
}
return 1;
}
RB_HEAD(alloctree, node) allocs;
RB_PROTOTYPE(alloctree, node, chain, ptrcmp);
RB_GENERATE(alloctree, node, chain, ptrcmp);
uint64_t maxbytes = 0;
uint64_t maxchunks = 0;
uint64_t bytes = 0;
uint64_t chunks = 0;
static uint64_t gethex(const char *p) {
return strtoull(p, NULL, 16);
}
static void
allocate() {
struct node *p = malloc(sizeof(*p));
char *s = strtok(NULL, " ");
p->ptr = gethex(s);
s = strtok(NULL, " ");
p->size = gethex(s);
RB_INSERT(alloctree, &allocs, p);
bytes += p->size;
chunks += 1;
if (bytes > maxbytes) {
maxbytes = bytes;
}
if (chunks > maxchunks) {
maxchunks = chunks;
}
}
static void
release() {
char *s = strtok(NULL, " ");
struct node n;
n.ptr = gethex(s);
if (n.ptr == 0) {
return;
}
struct node *p = RB_FIND(alloctree, &allocs, &n);
RB_REMOVE(alloctree, &allocs, p);
bytes -= p->size;
chunks -= 1;
free(p);
}
int
main(int argc, char **argv) {
char buf[1024];
FILE *f = fopen(argv[1], "r");
for (;;) {
if (fgets(buf, 1000, f) == NULL) {
break;
}
char *s = strtok(buf, " ");
if (*s == 'a') {
allocate();
} else if (*s == 'f') {
release();
} else abort();
}
printf("chunks: %"PRIu64"\n", maxchunks);
printf("bytes: %"PRIu64"\n", maxbytes);
return 0;
}
{-# LANGUAGE BangPatterns #-}
module Main where
import Data.IntMap as M
import Data.Text(pack, empty)
import Data.Text.Read
import Data.Maybe
import Debug.Trace
type AllocMap = M.IntMap Int
readHex :: String -> Int
readHex s = use (hexadecimal $ pack s)
where
use (Right (v, t)) | t == Data.Text.empty = v
use e = error (show e)
getMax :: Int -> Int -> AllocMap -> [String] -> (Int, Int)
getMax mb mc _ [] = (mb, mc)
getMax !mb !mc !allocs (l:ls) = getMax nmb nmc nallocs ls
where
(w:ws) = words l
(nmb, nmc, nallocs) = case w of
"a" -> (mb + size, mc + 1, M.insert addr size allocs)
"f" -> if faddr == 0 then (mb, mc, allocs) else (mb - fsize, mc - 1, M.delete faddr allocs)
_ -> error $ "invalid log line " ++ l
(saddr:ssize:_) = ws
size = readHex ssize
addr = readHex saddr
(fsaddr:[]) = ws
faddr = readHex fsaddr
fsize = fromJust $ M.lookup faddr allocs
main :: IO()
main = do
src <- readFile "/tmp/memlog.1003"
let (maxbytes, maxchunks) = getMax 0 0 M.empty (lines src)
print maxbytes
print maxchunks
allocs = {}
used = 0
chunks = 0
maxused = 0
maxchunks = 0
with open("/var/tmp/memlog.1003") as f:
for l in f:
try:
if l[0] == 'a':
_, sptr, ssize, _ = l.strip().split(' ', 3)
# ptr = int(sptr, 16)
size = int(ssize, 16)
allocs[sptr] = size
chunks += 1
used += size
maxchunks = max(chunks, maxchunks)
maxused = max(used, maxused)
else:
try:
_, sptr = l.strip().split(' ')
# ptr = int(sptr, 16)
# if ptr == 0:
if sptr == "0000000000000000":
continue
size = allocs[sptr]
del allocs[sptr]
used -= size
chunks -= 1
except KeyError:
print allocs
raise
except ValueError:
print l
raise
print "max chunks: ", maxchunks
print "max bytes: ", maxused
a 00000001082002d0 0000000000000029 0000000000f1cf67 0000000000f1cf8d 0000000000a47e12 0000000000a497e3 00000000009cf739
a 0000000107b00000 0000000000000080 0000000000f1a92d 0000000000bce745 0000000000c0eb4b 0000000000c0c30a 0000000000c0a15c
f 0000000107f00100
a 0000000107d00c00 0000000000000148 0000000000f182f3 0000000000c129c9 0000000000c0eb61 0000000000c0c30a 0000000000c0a15c
@rblaze
Copy link
Author

rblaze commented Jul 31, 2012

Питон - 16m 30s
Хаскель со String - 30m и не дождался завершения

@rblaze
Copy link
Author

rblaze commented Jul 31, 2012

Хаскель с ByteString - 14m30s

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment