runs on i7 13700k
- code: https://github.com/dundee/pybenchmarks/blob/master/bencher/programs/pystone/pystone.python3
- run for 1_000_000
install from apt
: 3.12.3
FROM ubuntu:24.04 | |
ARG PGRX_VERSION=0.12.6 | |
ENV DEBIAN_FRONTEND=noninteractive \ | |
LANG=en_US.UTF-8 \ | |
LC_ALL=en_US.UTF-8 | |
RUN apt update && \ | |
apt install -y --no-install-recommends \ |
runs on i7 13700k
install from apt
: 3.12.3
# sequential write
sudo fio --name=write_throughput --directory=. --numjobs=4 \
--size=2G --time_based --runtime=5m --ramp_time=2s --ioengine=libaio \
# use devel since vllm need to compile the paged attention | |
ARG base=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 | |
FROM ${base} | |
ENV DEBIAN_FRONTEND=noninteractive LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 | |
ARG commit=main | |
ARG CONDA_VERSION=py310_23.3.1-0 | |
RUN apt update && \ |
import json
import base64
import msgpack
import numpy as np
dim = 1536
emb = np.random.rand(dim).astype(np.float32)
emb_list = emb.tolist()
requires:
# terminal 1
python mosec_websocket.py
# terminal 2
package main | |
import ( | |
"fmt" | |
"sync" | |
"go.starlark.net/starlark" | |
"go.starlark.net/starlarkstruct" | |
) |
from flask import Flask, request | |
app = Flask(__name__) | |
@app.route("/inference", methods=["POST"]) | |
def inference(): | |
return request.get_data() |