Skip to content

Instantly share code, notes, and snippets.

View davidgilbertson's full-sized avatar

David Gilbertson davidgilbertson

  • Sydney, Australia
View GitHub Profile
@davidgilbertson
davidgilbertson / embed.py
Last active January 15, 2025 04:32
A function to embed texts with OpenAI, with concurrency and allowance for max inputs and max token length
from typing import Literal
import asyncio
import numpy as np
from openai import AsyncOpenAI
import tiktoken
def embed(
texts: list[str],
model: Literal[
import pandas as pd
import numpy as np
import streamlit as st
st.set_page_config(
page_title="SDC selector",
layout="wide",
)
import pandas as pd
from datasets import Dataset
# Requires transformers 4.48
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
TrainingArguments,
Trainer,
DataCollatorWithPadding,
)
from io import StringIO
import re
from time import perf_counter
import json
import yaml
import pandas as pd
import tomlkit
from openai import OpenAI
client = OpenAI()
@davidgilbertson
davidgilbertson / markdown_to_unicode.py
Last active July 15, 2024 23:26
Convert markdown text to unicode characters where possible
# Licensed under the MIT-0 License. See https://opensource.org/licenses/MIT-0 for details.
import re
def markdown_to_unicode(md_text):
"""
Converts Markdown text to Unicode by transforming Markdown syntax for
bold and italic text into their corresponding Unicode characters,
handling lists and headings, and preserving the formatting within code blocks.
/* Obsidian CSS reference: https://docs.obsidian.md/Reference/CSS+variables/Editor/Code */
body {
/* Code syntax highlighting */
--jetbrains-grey-code: #BCBEC4;
--code-normal: --jetbrains-grey-code;
--code-comment: #7A7E85;
--code-function: --jetbrains-grey-code;
--code-important: hotpink;
--code-keyword: #CF8E6D;
@davidgilbertson
davidgilbertson / Python-List.py.groovy
Last active August 23, 2023 20:37
Custom extractor for PyCharm - copy tables to a list of dicts
import static com.intellij.openapi.util.text.StringUtil.escapeStringCharacters as escapeStr
SEPARATOR = ", "
QUOTE = "\""
NEWLINE = System.getProperty("line.separator")
def record(columns, dataRow) {
OUT.append(" {").append(NEWLINE)
columns.eachWithIndex { column, idx ->
# MIT License
#
# Copyright (c) 2024 David Gilbertson
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
Word Deconstruction
it's it ~is
years year ~s
going go ~ing
that's that ~is
i'm i ~am
things thing ~s
states state ~s
including include ~ing
called call ~ed
print('Hello from testing.py')