Bootstrap knowledge of LLMs ASAP. With a bias/focus to GPT.
Avoid being a link dump. Try to provide only valuable well tuned information.
Neural network links before starting with transformers.
| { | |
| "name": "Apple Silicon", | |
| "load_params": { | |
| "n_ctx": 2048, | |
| "n_batch": 512, | |
| "rope_freq_base": 10000, | |
| "rope_freq_scale": 1, | |
| "n_gpu_layers": 1, | |
| "use_mlock": false, | |
| "main_gpu": 0, |
| source ~/miniconda3/bin/activate allen | |
| LANG=en | |
| TASK=qa_en_small | |
| for SPLIT in train valid | |
| do | |
| python -m examples.roberta.multiprocessing_bpe_encoder \ | |
| --encoder-json encoder.json \ | |
| --vocab-bpe vocab.bpe \ | |
| --inputs "$TASK/$SPLIT.$LANG" \ |
| fairseq-train qa_en_small-bin \ | |
| --log-interval=10 \ | |
| --log-format=json \ | |
| --tensorboard-logdir=/users/tom/ed/sp/pretrain/tests/fairseq/bart_en_small/logs \ | |
| --seed=1 \ | |
| --cpu \ | |
| --min-loss-scale=0.0001 \ | |
| --model-parallel-size=1 \ | |
| --criterion=cross_entropy \ |
| ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
| ## Created by: Hang Zhang, Rutgers University, Email: [email protected] | |
| ## Modified by Thomas Wolf, HuggingFace Inc., Email: [email protected] | |
| ## Copyright (c) 2017-2018 | |
| ## | |
| ## This source code is licensed under the MIT-style license found in the | |
| ## LICENSE file in the root directory of this source tree | |
| ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
| """Encoding Data Parallel""" |
| import multiprocessing | |
| from gensim.corpora.wikicorpus import WikiCorpus | |
| from gensim.models.word2vec import Word2Vec | |
| from gensim.models import TfidfModel | |
| # logging is important to get the state of the functions | |
| import logging | |
| logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s') | |
| logging.root.setLevel(level=logging.INFO) |