This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from datasets import load_dataset | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from peft import LoraConfig, get_peft_model, prepare_model_for_int8_training | |
from trl import SFTTrainer | |
tokenizer = AutoTokenizer.from_pretrained('TinyPixel/Llama-2-7B-bf16-sharded') | |
tokenizer.pad_token_id = 0 | |
tokenizer.padding_side = 'left' | |
train_dataset = load_dataset('json', data_files='alpaca_gpt4_data_zh.json', split='train') | |
model = AutoModelForCausalLM.from_pretrained( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, json | |
from glob import glob | |
prompt_input = ( | |
"Below is an instruction that describes a task, paired with an input that provides further context. " | |
"Write a response that appropriately completes the request.\n\n" | |
"### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:" | |
) | |
prompt_no_input = ( | |
"Below is an instruction that describes a task. " | |
"Write a response that appropriately completes the request.\n\n" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
autotrain llm --train \ | |
--data-path data \ | |
--epochs 3 \ | |
--block-size 1024 \ | |
--gradient-accumulation 4 \ | |
--logging_steps 500 \ | |
--lora-alpha 32 \ | |
--lora-dropout 0.05 \ | |
--lora-r 8 \ | |
--lr 2e-4 \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, torch | |
from glob import glob | |
from peft import PeftModel | |
from transformers import PreTrainedTokenizerFast, AutoTokenizer, AutoModelForCausalLM, GenerationConfig, TextStreamer | |
pretrains = [os.path.basename(path).replace('models--','').replace('--','/') for path in glob(os.path.expanduser('~/.cache/huggingface/hub/models--*'))] | |
for i,pretrain in enumerate(pretrains): print(i,pretrain) | |
pretrain = pretrains[int(input('選擇預訓練'))] | |
tokenizer = AutoTokenizer.from_pretrained(pretrain) | |
model = AutoModelForCausalLM.from_pretrained(pretrain,device_map='auto',torch_dtype=torch.float16) | |
lora = input('LoRA路徑') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from llama_cpp import Llama | |
from glob import glob | |
n_gpu_layers = 50 if input('使用GPU嗎(Y/n)').upper()=='Y' else 0 | |
n_threads = None if n_gpu_layers else 4 | |
ggmls = glob('*ggml*.bin') | |
for i,ggml in enumerate(ggmls): print(i, ggml) | |
ggml = ggmls[int(input('選擇模型'))] | |
llm = Llama(ggml, n_gpu_layers=n_gpu_layers, n_threads=n_threads, verbose=False) | |
while True: | |
prompt = 'Q:{Q}?\nA:'.format(Q=input('\nyou>')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs'); | |
const { execSync } = require('child_process'); | |
// 讀取版號 | |
const package = JSON.parse(fs.readFileSync('package.json')); | |
let version = process.argv[2]; // 指令引數 | |
if (!version) { | |
// 自動增加版號 | |
version = parseInt(package.version.replace(/\./g, '')); | |
version = (version + 1).toString(); | |
version = '0'.repeat(Math.max(3 - version.length)) + version; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer-core'); | |
const fs = require('fs'); | |
const path = require('path'); | |
const glob = require('glob').sync; | |
const { execSync } = require('child_process'); | |
const edgeExe = 'C:/Program Files (x86)/Microsoft/Edge/Application/msedge.exe'; | |
/** | |
* 全國電子公佈欄爬蟲 | |
* https://www.odbbs.gov.tw/odbbs/html/announce.jsp | |
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 下載 https://tesseract-ocr.github.io/tessdoc/Downloads.html | |
# 權重 https://github.com/tesseract-ocr/tessdata_best | |
# .box檔案格式:字 6 394 45 410 0 | |
import os | |
import shutil | |
from PIL import Image | |
from glob import glob | |
lang = 'chi_tra' | |
font = 'ocrb' | |
dist = 'tessdata' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
k = 2 # K個群集 | |
max_iter = 10 # 最大迭代 | |
data = np.array([1, 2, 3, 4, 11, 12]) | |
last = np.random.choice(data, k, replace=False) # 初始化K個中心點 | |
func = lambda x,y: np.abs(x-y) # 計算距離方法 | |
steps = [] | |
for _ in range(max_iter): | |
# 對每筆資料求出最近的中心點 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io | |
import pandas as pd | |
from itertools import permutations, combinations | |
CSV = """ | |
交易紀錄 牛奶(A) 麵包(B) 餅乾(C) 柳橙汁(D) 汽水(E) 泡麵(F) 水果(G) | |
101 1 1 1 1 0 0 0 | |
102 0 1 1 0 1 1 0 | |
103 1 0 1 0 0 0 1 | |
104 1 1 0 1 0 1 1 | |
105 0 0 1 0 1 0 1 |
NewerOlder