Last active
May 25, 2024 10:19
-
-
Save dacr/abe41c96e15ece7f32d60493dae1f556 to your computer and use it in GitHub Desktop.
playing with GPT2 / published by https://github.com/dacr/code-examples-manager #296ca1e1-ede9-4d57-9f1a-6f0f14fb2fbc/acde99e98c6101c6faf62606d11dfeb86273f263
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// summary : playing with GPT2 | |
// keywords : djl, machine-learning, gpt, ai, @testable | |
// publish : gist | |
// authors : David Crosson | |
// license : Apache NON-AI License Version 2.0 (https://raw.githubusercontent.com/non-ai-licenses/non-ai-licenses/main/NON-AI-APACHE2) | |
// id : 296ca1e1-ede9-4d57-9f1a-6f0f14fb2fbc | |
// created-on : 2024-02-03T08:18:12+01:00 | |
// managed-by : https://github.com/dacr/code-examples-manager | |
// run-with : scala-cli $file | |
// --------------------- | |
//> using scala "3.4.2" | |
//> using dep "org.slf4j:slf4j-api:2.0.13" | |
//> using dep "org.slf4j:slf4j-simple:2.0.13" | |
//> using dep "net.java.dev.jna:jna:5.14.0" | |
//> using dep "ai.djl:api:0.28.0" | |
//> using dep "ai.djl:basicdataset:0.28.0" | |
//> using dep "ai.djl:model-zoo:0.28.0" | |
//> using dep "ai.djl.huggingface:tokenizers:0.28.0" | |
//> using dep "ai.djl.mxnet:mxnet-engine:0.28.0" | |
//> using dep "ai.djl.mxnet:mxnet-model-zoo:0.28.0" | |
//> using dep "ai.djl.pytorch:pytorch-engine:0.28.0" | |
//> using dep "ai.djl.pytorch:pytorch-model-zoo:0.28.0" | |
//> using dep "ai.djl.tensorflow:tensorflow-engine:0.28.0" | |
//> using dep "ai.djl.tensorflow:tensorflow-model-zoo:0.28.0" | |
////> using dep "ai.djl.paddlepaddle:paddlepaddle-engine:0.28.0" | |
////> using dep "ai.djl.paddlepaddle:paddlepaddle-model-zoo:0.28.0" | |
//> using dep "ai.djl.onnxruntime:onnxruntime-engine:0.28.0" | |
// --------------------- | |
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "error") | |
import ai.djl.Application | |
import ai.djl.engine.Engine | |
import ai.djl.huggingface.tokenizers.HuggingFaceTokenizer | |
import ai.djl.modality.nlp.generate.{CausalLMOutput, SearchConfig, TextGenerator} | |
import ai.djl.repository.zoo.Criteria | |
import ai.djl.training.util.ProgressBar | |
import ai.djl.huggingface.translator.QuestionAnsweringTranslatorFactory | |
import ai.djl.ndarray.NDList | |
import ai.djl.translate.DeferredTranslatorFactory | |
import scala.io.AnsiColor.{BLUE, BOLD, CYAN, GREEN, MAGENTA, RED, RESET, UNDERLINED, YELLOW} | |
// inspired from https://github.com/deepjavalibrary/djl/blob/master/examples/src/main/java/ai/djl/examples/inference/nlp/TextGeneration.java | |
val criteria = | |
Criteria.builder | |
.setTypes(classOf[NDList], classOf[CausalLMOutput]) | |
.optModelUrls("https://djl-misc.s3.amazonaws.com/test/models/gpt2/gpt2_pt.zip") | |
.optEngine("PyTorch") | |
.optTranslatorFactory(new DeferredTranslatorFactory) | |
.optProgress(new ProgressBar) | |
.build | |
val model = criteria.loadModel() | |
val predictor = model.newPredictor() | |
val manager = model.getNDManager().newSubManager() | |
val tokenizer = HuggingFaceTokenizer.newInstance("gpt2") | |
val config = { | |
val c = SearchConfig() | |
c.setMaxSeqLength(200) | |
c.setPadTokenId(220) | |
c | |
} | |
//val searchName = "greedy" | |
val searchName = "contrastive" | |
//val searchName = "beam" | |
val generator = TextGenerator(predictor, searchName, config) | |
val input = "Being president of the Earth. My name is Joe Doe" | |
val encoding = tokenizer.encode(input) | |
val inputIds = encoding.getIds | |
val inputIdArray = manager.create(inputIds).expandDims(0) | |
val outputIdArray = generator.generate(inputIdArray) | |
val outputIds = outputIdArray.toLongArray | |
val output = tokenizer.decode(outputIds) | |
println(output) | |
/* | |
Being president of the Earth. My name is Joe Doe. | |
I'm a scientist who works on climate change issues. | |
I'm also a member of the National Academy of Sciences. | |
I'm a professor of chemistry at Columbia University. | |
I'm a co-author of the book Climate Change: How Humans Are Changing the Earth's Atmosphere and Water Volumes, | |
published by Oxford University Press. | |
I'm also a member of the advisory board of the American Geophysical Union. | |
AMY GOODMAN: | |
So you're president of the Earth Institute, which is based in Boulder, Colorado. | |
What do you think about climate change? | |
JOE DUANE: | |
Well, I think it's really important to understand that we are living in a global warming era. | |
And I think that the best way to address this is to take action now. We need to act now. | |
We need to do something about it. | |
And I think that we're going to have to start thinking about how we respond to climate change. | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment