Created
July 31, 2012 11:56
-
-
Save rblaze/3216437 to your computer and use it in GitHub Desktop.
Соревнования на скорость парсинга
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BangPatterns #-} | |
module Main where | |
import qualified Data.ByteString.Lazy.Char8 as BS | |
import Data.IntMap as M | |
import Data.Maybe | |
import Data.Bits | |
import Data.Char | |
import Debug.Trace | |
type AllocMap = M.IntMap Int | |
readHex :: BS.ByteString -> Int | |
readHex s = BS.foldl' digit 0 s | |
where | |
digit i c = (shiftL i 4) + (toVal c) | |
toVal v | v >= '0' && v <= '9' = (ord v) - (ord '0') | |
toVal v = (ord v) - (ord 'a') + 10 | |
getMax :: Int -> Int -> Int -> Int -> AllocMap -> [BS.ByteString] -> (Int, Int) | |
getMax mb mc _ _ _ [] = (mb, mc) | |
getMax !mb !mc !b !c !allocs (l:ls) = getMax nmb nmc nb nc nallocs ls | |
where | |
(w:ws) = BS.words l | |
(nmb, nmc, nb, nc, nallocs) = case BS.head w of | |
'a' -> (max mb (b + size), max mc (c + 1), b + size, c + 1, M.insert addr size allocs) | |
'f' -> if faddr == 0 then (mb, mc, b, c, allocs) else (mb, mc, b - fsize, c - 1, M.delete faddr allocs) | |
_ -> error $ "invalid log line " ++ show l | |
(saddr:ssize:_) = ws | |
size = readHex ssize | |
addr = readHex saddr | |
(fsaddr:[]) = ws | |
faddr = readHex fsaddr | |
fsize = fromJust $ M.lookup faddr allocs | |
main :: IO() | |
main = do | |
src <- BS.readFile "/tmp/memlog.1003" | |
let (maxbytes, maxchunks) = getMax 0 0 0 0 M.empty (BS.lines src) | |
print maxbytes | |
print maxchunks |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <inttypes.h> | |
#include <stdint.h> | |
#include <string.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include "tree.h" | |
#define __unused | |
struct node { | |
RB_ENTRY(node) chain; | |
uint64_t ptr; | |
uint64_t size; | |
}; | |
static int | |
ptrcmp(struct node *n1, struct node *n2) { | |
if (n1->ptr < n2->ptr) { | |
return -1; | |
} else if (n1->ptr == n2->ptr) { | |
return 0; | |
} | |
return 1; | |
} | |
RB_HEAD(alloctree, node) allocs; | |
RB_PROTOTYPE(alloctree, node, chain, ptrcmp); | |
RB_GENERATE(alloctree, node, chain, ptrcmp); | |
uint64_t maxbytes = 0; | |
uint64_t maxchunks = 0; | |
uint64_t bytes = 0; | |
uint64_t chunks = 0; | |
static uint64_t gethex(const char *p) { | |
return strtoull(p, NULL, 16); | |
} | |
static void | |
allocate() { | |
struct node *p = malloc(sizeof(*p)); | |
char *s = strtok(NULL, " "); | |
p->ptr = gethex(s); | |
s = strtok(NULL, " "); | |
p->size = gethex(s); | |
RB_INSERT(alloctree, &allocs, p); | |
bytes += p->size; | |
chunks += 1; | |
if (bytes > maxbytes) { | |
maxbytes = bytes; | |
} | |
if (chunks > maxchunks) { | |
maxchunks = chunks; | |
} | |
} | |
static void | |
release() { | |
char *s = strtok(NULL, " "); | |
struct node n; | |
n.ptr = gethex(s); | |
if (n.ptr == 0) { | |
return; | |
} | |
struct node *p = RB_FIND(alloctree, &allocs, &n); | |
RB_REMOVE(alloctree, &allocs, p); | |
bytes -= p->size; | |
chunks -= 1; | |
free(p); | |
} | |
int | |
main(int argc, char **argv) { | |
char buf[1024]; | |
FILE *f = fopen(argv[1], "r"); | |
for (;;) { | |
if (fgets(buf, 1000, f) == NULL) { | |
break; | |
} | |
char *s = strtok(buf, " "); | |
if (*s == 'a') { | |
allocate(); | |
} else if (*s == 'f') { | |
release(); | |
} else abort(); | |
} | |
printf("chunks: %"PRIu64"\n", maxchunks); | |
printf("bytes: %"PRIu64"\n", maxbytes); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BangPatterns #-} | |
module Main where | |
import Data.IntMap as M | |
import Data.Text(pack, empty) | |
import Data.Text.Read | |
import Data.Maybe | |
import Debug.Trace | |
type AllocMap = M.IntMap Int | |
readHex :: String -> Int | |
readHex s = use (hexadecimal $ pack s) | |
where | |
use (Right (v, t)) | t == Data.Text.empty = v | |
use e = error (show e) | |
getMax :: Int -> Int -> AllocMap -> [String] -> (Int, Int) | |
getMax mb mc _ [] = (mb, mc) | |
getMax !mb !mc !allocs (l:ls) = getMax nmb nmc nallocs ls | |
where | |
(w:ws) = words l | |
(nmb, nmc, nallocs) = case w of | |
"a" -> (mb + size, mc + 1, M.insert addr size allocs) | |
"f" -> if faddr == 0 then (mb, mc, allocs) else (mb - fsize, mc - 1, M.delete faddr allocs) | |
_ -> error $ "invalid log line " ++ l | |
(saddr:ssize:_) = ws | |
size = readHex ssize | |
addr = readHex saddr | |
(fsaddr:[]) = ws | |
faddr = readHex fsaddr | |
fsize = fromJust $ M.lookup faddr allocs | |
main :: IO() | |
main = do | |
src <- readFile "/tmp/memlog.1003" | |
let (maxbytes, maxchunks) = getMax 0 0 M.empty (lines src) | |
print maxbytes | |
print maxchunks |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
allocs = {} | |
used = 0 | |
chunks = 0 | |
maxused = 0 | |
maxchunks = 0 | |
with open("/var/tmp/memlog.1003") as f: | |
for l in f: | |
try: | |
if l[0] == 'a': | |
_, sptr, ssize, _ = l.strip().split(' ', 3) | |
# ptr = int(sptr, 16) | |
size = int(ssize, 16) | |
allocs[sptr] = size | |
chunks += 1 | |
used += size | |
maxchunks = max(chunks, maxchunks) | |
maxused = max(used, maxused) | |
else: | |
try: | |
_, sptr = l.strip().split(' ') | |
# ptr = int(sptr, 16) | |
# if ptr == 0: | |
if sptr == "0000000000000000": | |
continue | |
size = allocs[sptr] | |
del allocs[sptr] | |
used -= size | |
chunks -= 1 | |
except KeyError: | |
print allocs | |
raise | |
except ValueError: | |
print l | |
raise | |
print "max chunks: ", maxchunks | |
print "max bytes: ", maxused |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
a 00000001082002d0 0000000000000029 0000000000f1cf67 0000000000f1cf8d 0000000000a47e12 0000000000a497e3 00000000009cf739 | |
a 0000000107b00000 0000000000000080 0000000000f1a92d 0000000000bce745 0000000000c0eb4b 0000000000c0c30a 0000000000c0a15c | |
f 0000000107f00100 | |
a 0000000107d00c00 0000000000000148 0000000000f182f3 0000000000c129c9 0000000000c0eb61 0000000000c0c30a 0000000000c0a15c |
Хаскель с ByteString - 14m30s
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Питон - 16m 30s
Хаскель со String - 30m и не дождался завершения