Skip to content

Instantly share code, notes, and snippets.

View lemire's full-sized avatar
🚀
working hard and fast

Daniel Lemire lemire

🚀
working hard and fast
View GitHub Profile
#################
# This starts a web server listening on port 8001, with debugging turned n.
# This should not be be used to run the chatbot on a public website: it is meant
# for testing purposes only.
#################
from flask import Flask, request, jsonify
from flask import Flask, render_template, request, url_for
from langchain.chat_models import ChatOpenAI
from langchain.docstore.document import Document
@lemire
lemire / embedding.py
Created December 6, 2023 03:12
generate embeddings
###############
# You should basically never use this program. It is only for generating the embeddings for your ChatBot.
# If you want to run the ChatBot, see web_app.py
###############
import os
from openai.embeddings_utils import get_embedding, cosine_similarity
import pandas
import openai
import glob
import time
@lemire
lemire / testcurl.c
Created November 21, 2023 19:20
C program to test curl URL normalization
// cc test.c -lcurl -o testcurl && ./testcurl
#include <curl/curl.h>
#include <stdio.h>
int main() {
CURLU *url = curl_url();
CURLUcode rc = curl_url_set(
url, CURLUPART_URL, "https://www.7‑Eleven.com/Home/Privacy/Montréal", 0);
// Returns a CURLUcode error value, which is (0) if everything went fine.
if (rc == 0) {
char *buffer;
@lemire
lemire / demosimdjson.cpp
Last active October 25, 2023 20:44
demosimdjson
#include "simdjson.h"
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
using namespace simdjson;
std::string read_file(const std::string &filename) {
std::ifstream file(filename);
@lemire
lemire / test.js
Created September 29, 2023 22:23
"use strict";
import { bench, run } from "mitata";
import { existsSync, createWriteStream, readFileSync, mkdirSync } from "node:fs";
import path from "node:path";
import axios from "axios";
const fixturesFolderPath = new URL('fixtures', import.meta.url).pathname;
const urls = [
"https://github.com/ada-url/url-various-datasets/blob/main/files/isaacs_files.txt",
template <int align> struct alignas(align) A {
volatile uint64_t count;
};
constexpr size_t iterations = 100'000'000;
void counter(volatile uint64_t *counterpt) {
for (size_t i = 0; i < iterations; i++) {
*counterpt = 3 * *counterpt * *counterpt + 1;
@lemire
lemire / Ascii.cs
Created August 29, 2023 16:51
Démo
using System;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// Ideally, we would want to implement something that looks like
// https://learn.microsoft.com/en-us/dotnet/api/system.text.asciiencoding?view=net-7.0
@lemire
lemire / timings.txt
Created May 24, 2023 23:56
Measurements from https://github.com/lemire/talks/tree/master/2023/performance/code (first column is timings, each row is a measure, other columns are performance counters)
52791.000000 164108.000000 353080.000000
43083.000000 134047.000000 342931.000000
42750.000000 132955.000000 342934.000000
42792.000000 133068.000000 342947.000000
42625.000000 132742.000000 342930.000000
42667.000000 132701.000000 342934.000000
42541.000000 132560.000000 342938.000000
42583.000000 132513.000000 342930.000000
42667.000000 132687.000000 342938.000000
42583.000000 132509.000000 342938.000000
@lemire
lemire / long_way.cpp
Created May 22, 2023 20:32
long way to do object/string to string
#include "simdjson.h"
#include <iostream>
using namespace simdjson; // optional
int main() {
padded_string json = R"( {"exports": {}} )"_padded;
ondemand::parser parser;
ondemand::document doc = parser.iterate(json);
ondemand::object obj;
auto err = doc.get_object().get(obj);
@lemire
lemire / script.js
Created May 8, 2023 23:33
roaring vs set
const Benchmark = require('benchmark');
const { TypedFastBitSet } = require('typedfastbitset');
const { RoaringBitmap32 } = require('roaring/RoaringBitmap32');
// Set utils
function union(set1, set2) {
return new Set([...set1, ...set2]);
}
function difference(set1, set2) {