Skip to content

Instantly share code, notes, and snippets.

View xiabingquan's full-sized avatar

xiabingquan xiabingquan

  • Beijing, China
View GitHub Profile
@xiabingquan
xiabingquan / visualize_edit_distance.py
Last active December 12, 2022 06:10
Visualize errors of Edit Distance (Insertion, Deletion and Replacement)
from argparse import ArgumentParser
import editdistance
from rich.text import Text
from rich.console import Console
def edit_dist_dp(gt, hyp):
"""
A Dynamic Programming based Python program for edit distance problem
@xiabingquan
xiabingquan / transformer_all_in_one.py
Created December 6, 2023 14:52
Implement Transformer from scratach. All modules included in one file!
# coding=utf-8
# Contact: [email protected]
import numpy as np
import torch
import torch.nn as nn
def get_len_mask(b: int, max_len: int, feat_lens: torch.Tensor, device: torch.device) -> torch.Tensor:
@xiabingquan
xiabingquan / flash_attention_in_numpy.py
Last active April 11, 2025 02:03
An toy example of flash attention implemented in Numpy.
# A minimal exmaple of flash attention implemented in Numpy
# Contact: bingquanxia AT qq.com
import unittest
from typing import List
import numpy as np
import torch
@xiabingquan
xiabingquan / bpe_tokenizer_from_scratch.py
Last active April 13, 2025 04:41
Building a BPE (Bpte-Pair Encoding) tokenizer from scratch.
# A minimal example of how to implement byte-pair encoding (BPE) tokenizer from scratch in Python.
# Reference: https://github.com/karpathy/minbpe
# Contact: [email protected]
def get_stats(byte_arr):
# get the frequency of each byte pair in the text
count = {}
for pair in zip(byte_arr[:-1], byte_arr[1:]): # e.g. pair: (b'a', b' ')
count[pair] = count.get(pair, 0) + 1
@xiabingquan
xiabingquan / insert_white_spaces.py
Last active January 21, 2025 02:38
A super easy (but useful) script to insert whitespaces to English words which are adjacent to Chinese.
import re
import sys
def insert_spaces(text):
text = re.sub(r'([a-zA-Z])([\u4e00-\u9fa5])', r'\1 \2', text)
text = re.sub(r'([\u4e00-\u9fa5])([a-zA-Z])', r'\1 \2', text)
return text
if __name__ == '__main__':
@xiabingquan
xiabingquan / TrieTree.py
Created January 21, 2025 02:40
A minimal (not fully tested) example of Trie Tree
from __future__ import annotations
from copy import deepcopy
from typing import List, Optional
class TrieTree(object):
# Char-level Trie Tree
def __init__(self, is_end: bool, val: str = '', parent: Optional[TrieTree] = None):
"""
A Trie Tree
@xiabingquan
xiabingquan / heap.py
Created January 24, 2025 01:53
A minimal example of heap
from __future__ import annotations
import heapq
import random
from copy import deepcopy
from typing import List, Callable
class Heap(object):
def __init__(self, arr: List[int], func: Callable[[int, int], bool]):
self.arr = arr
@xiabingquan
xiabingquan / merge_sort.py
Created January 24, 2025 16:11
A minimal example of merge sort
from __future__ import annotations
import random
from typing import List
def merge_sort(arr: List[int]) -> List[int]:
if len(arr) == 1 or len(arr) == 0:
return arr
mid = len(arr) // 2
@xiabingquan
xiabingquan / merge_sort.rs
Created February 10, 2025 15:50
A minmal example of merge sort in Rust
use rand::Rng;
fn merge_sort<T: Ord + Clone + Copy>(arr: &mut [T]) {
let len = arr.len();
if len < 2 {
return;
}
let mid = len / 2;
merge_sort(&mut arr[0..mid]);
merge_sort(&mut arr[mid..]);
@xiabingquan
xiabingquan / bubble_sort.rs
Created February 11, 2025 14:24
A minimal example of bubble sort in Rust
use rand::Rng;
fn bubble_sort<T: Ord>(arr: &mut [T]) {
let n = arr.len();
for i in 0..n {
for j in 0..n-1-i {
if arr[j] > arr[j + 1] {
arr.swap(j, j + 1);
}