This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def calc_num_token_of_bin(filesize: str) -> int: | |
m = re.findall(r"(\d+)([G|T])", filesize) | |
assert len(m) == 1, f"Expect string like '10G', '1T', but got {filesize}" | |
m = m[0] | |
digit, dim = int(m[0]), m[1] | |
pow = 2 if dim == 'G' else 3 | |
return digit / 4 * (1024 / 1000) ** pow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# References: https://github.com/huggingface/picotron_tutorial/blob/master/step1_modeling/utils.py | |
def print(*args, is_print_rank=True, **kwargs): | |
""" solves multi-process interleaved print problem """ | |
if not is_print_rank: return | |
with open(__file__, "r") as fh: | |
fcntl.flock(fh, fcntl.LOCK_EX) | |
try: | |
builtins.print(*args, **kwargs) | |
finally: | |
fcntl.flock(fh, fcntl.LOCK_UN) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import logging | |
from typing import Callable | |
from functools import wraps | |
# Reference: https://github.com/NVIDIA/Megatron-LM/blob/9a496c976e12a62ce8e39e14496e52a985588730/megatron/core/dist_checkpointing/strategies/two_stage.py#L35 | |
def timed(verbose=True) -> Callable: | |
def timed_dec(fn): | |
name = fn.__name__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fn insert_sort<T: Ord + Clone>(arr: &mut [T]) { | |
for i in 0..arr.len() - 1 { | |
let val = arr[i + 1].clone(); | |
let mut j = i as isize; | |
while j >= 0 && val > arr[j as usize] { | |
j -= 1; | |
} | |
for k in ((j+1) as usize..i+1).rev() { | |
arr[k + 1] = arr[k].clone(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fn selective_sort<T: Ord + Clone>(arr: &mut [T]) { | |
let n = arr.len(); | |
for i in 0..n { | |
let mut max_val = arr[i].clone(); | |
let mut max_idx = i; | |
for j in i..n { | |
if arr[j] > max_val { | |
max_val = arr[j].clone(); | |
max_idx = j; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use rand::Rng; | |
fn bubble_sort<T: Ord>(arr: &mut [T]) { | |
let n = arr.len(); | |
for i in 0..n { | |
for j in 0..n-1-i { | |
if arr[j] > arr[j + 1] { | |
arr.swap(j, j + 1); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use rand::Rng; | |
fn merge_sort<T: Ord + Clone + Copy>(arr: &mut [T]) { | |
let len = arr.len(); | |
if len < 2 { | |
return; | |
} | |
let mid = len / 2; | |
merge_sort(&mut arr[0..mid]); | |
merge_sort(&mut arr[mid..]); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import annotations | |
import random | |
from typing import List | |
def merge_sort(arr: List[int]) -> List[int]: | |
if len(arr) == 1 or len(arr) == 0: | |
return arr | |
mid = len(arr) // 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import annotations | |
import heapq | |
import random | |
from copy import deepcopy | |
from typing import List, Callable | |
class Heap(object): | |
def __init__(self, arr: List[int], func: Callable[[int, int], bool]): | |
self.arr = arr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import annotations | |
from copy import deepcopy | |
from typing import List, Optional | |
class TrieTree(object): | |
# Char-level Trie Tree | |
def __init__(self, is_end: bool, val: str = '', parent: Optional[TrieTree] = None): | |
""" | |
A Trie Tree |
NewerOlder