billju’s gists

billju / finetune.py

Created September 23, 2023 08:53

	import torch
	from datasets import load_dataset
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import LoraConfig, get_peft_model, prepare_model_for_int8_training
	from trl import SFTTrainer
	tokenizer = AutoTokenizer.from_pretrained('TinyPixel/Llama-2-7B-bf16-sharded')
	tokenizer.pad_token_id = 0
	tokenizer.padding_side = 'left'
	train_dataset = load_dataset('json', data_files='alpaca_gpt4_data_zh.json', split='train')
	model = AutoModelForCausalLM.from_pretrained(

billju / convert-to-alpaca.py

Last active September 23, 2023 08:53

	import os, json
	from glob import glob
	prompt_input = (
	"Below is an instruction that describes a task, paired with an input that provides further context. "
	"Write a response that appropriately completes the request.\n\n"
	"### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:"
	)
	prompt_no_input = (
	"Below is an instruction that describes a task. "
	"Write a response that appropriately completes the request.\n\n"

billju / autotrain.sh

Created September 23, 2023 08:46

自動訓練

billju / lora-chat.py

Created September 23, 2023 08:31

peft lora 測試

	import os, torch
	from glob import glob
	from peft import PeftModel
	from transformers import PreTrainedTokenizerFast, AutoTokenizer, AutoModelForCausalLM, GenerationConfig, TextStreamer
	pretrains = [os.path.basename(path).replace('models--','').replace('--','/') for path in glob(os.path.expanduser('~/.cache/huggingface/hub/models--*'))]
	for i,pretrain in enumerate(pretrains): print(i,pretrain)
	pretrain = pretrains[int(input('選擇預訓練'))]
	tokenizer = AutoTokenizer.from_pretrained(pretrain)
	model = AutoModelForCausalLM.from_pretrained(pretrain,device_map='auto',torch_dtype=torch.float16)
	lora = input('LoRA路徑')

billju / cpp-chat.py

Created September 23, 2023 08:31

llama-cpp-python 測試

	from llama_cpp import Llama
	from glob import glob
	n_gpu_layers = 50 if input('使用GPU嗎(Y/n)').upper()=='Y' else 0
	n_threads = None if n_gpu_layers else 4
	ggmls = glob('ggml.bin')
	for i,ggml in enumerate(ggmls): print(i, ggml)
	ggml = ggmls[int(input('選擇模型'))]
	llm = Llama(ggml, n_gpu_layers=n_gpu_layers, n_threads=n_threads, verbose=False)
	while True:
	prompt = 'Q:{Q}?\nA:'.format(Q=input('\nyou>'))

billju / git-release.js

Created January 11, 2022 01:20

	const fs = require('fs');
	const { execSync } = require('child_process');
	// 讀取版號
	const package = JSON.parse(fs.readFileSync('package.json'));
	let version = process.argv[2]; // 指令引數
	if (!version) {
	// 自動增加版號
	version = parseInt(package.version.replace(/\./g, ''));
	version = (version + 1).toString();
	version = '0'.repeat(Math.max(3 - version.length)) + version;

billju / tesseract_ocr訓練.js

Last active February 4, 2022 00:19

	const puppeteer = require('puppeteer-core');
	const fs = require('fs');
	const path = require('path');
	const glob = require('glob').sync;
	const { execSync } = require('child_process');
	const edgeExe = 'C:/Program Files (x86)/Microsoft/Edge/Application/msedge.exe';
	/**
	* 全國電子公佈欄爬蟲
	* https://www.odbbs.gov.tw/odbbs/html/announce.jsp
	*/

billju / tesseract_ocr_train.py

Last active December 9, 2021 10:33

	# 下載 https://tesseract-ocr.github.io/tessdoc/Downloads.html
	# 權重 https://github.com/tesseract-ocr/tessdata_best
	# .box檔案格式：字 6 394 45 410 0
	import os
	import shutil
	from PIL import Image
	from glob import glob
	lang = 'chi_tra'
	font = 'ocrb'
	dist = 'tessdata'

billju / KMeans群集.py

Created December 1, 2021 04:02

	import numpy as np
	import pandas as pd
	k = 2 # K個群集
	max_iter = 10 # 最大迭代
	data = np.array([1, 2, 3, 4, 11, 12])
	last = np.random.choice(data, k, replace=False) # 初始化K個中心點
	func = lambda x,y: np.abs(x-y) # 計算距離方法
	steps = []
	for _ in range(max_iter):
	# 對每筆資料求出最近的中心點

billju / 關聯規則.py

Created December 1, 2021 03:23

	import io
	import pandas as pd
	from itertools import permutations, combinations
	CSV = """
	交易紀錄牛奶(A) 麵包(B) 餅乾(C) 柳橙汁(D) 汽水(E) 泡麵(F) 水果(G)
	101 1 1 1 1 0 0 0
	102 0 1 1 0 1 1 0
	103 1 0 1 0 0 0 1
	104 1 1 0 1 0 1 1
	105 0 0 1 0 1 0 1

ChuBoy billju