Created
August 11, 2020 20:20
-
-
Save svpino/c4d1978f9eac47c59e61c34125c8fad9 to your computer and use it in GitHub Desktop.
ai-generated-content.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "ai-generated-content.ipynb", | |
"provenance": [], | |
"authorship_tag": "ABX9TyM8YEp0mpwiuT92OlguqohJ", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/svpino/c4d1978f9eac47c59e61c34125c8fad9/ai-generated-content.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "HBMCqUqiDMpE", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 102 | |
}, | |
"outputId": "81b0aea6-fcdb-4c35-a2a2-0bc8eee57d5f" | |
}, | |
"source": [ | |
"# This will install the Transformes repository directly\n", | |
"# from Github.\n", | |
"!pip install -q git+https://github.com/huggingface/transformers.git" | |
], | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\u001b[K |████████████████████████████████| 3.0MB 2.8MB/s \n", | |
"\u001b[K |████████████████████████████████| 1.1MB 26.0MB/s \n", | |
"\u001b[K |████████████████████████████████| 890kB 38.9MB/s \n", | |
"\u001b[?25h Building wheel for transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "PUvscABODQ3q", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"import tensorflow as tf\n", | |
"import textwrap\n", | |
"from transformers import TFGPT2LMHeadModel, GPT2Tokenizer" | |
], | |
"execution_count": 12, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "BaYoChPhDhJT", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 85 | |
}, | |
"outputId": "61fff41e-8009-4bed-9e7d-a90efb859a9b" | |
}, | |
"source": [ | |
"# We are going to be using GPT-2 Medium. The first time we run this\n", | |
"# cell, the models will be downloaded. They are large.\n", | |
"tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2-medium\")\n", | |
"model = TFGPT2LMHeadModel.from_pretrained(\"gpt2-medium\")" | |
], | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"All model checkpoint weights were used when initializing TFGPT2LMHeadModel.\n", | |
"\n", | |
"All the weights of TFGPT2LMHeadModel were initialized from the model checkpoint at gpt2-medium.\n", | |
"If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "8az6BrxLDTSC", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# This is the seed text. You can to put here some text that will \n", | |
"# help the AI focus on a specific topic. The AI will generate text\n", | |
"# that's directly related to this seed.\n", | |
"SEED_TEXT = \"\"\"\n", | |
" Clean Code should be as enjoyable as reading a good novel. Clean Code tells a story.\n", | |
" Clean Code is a pleasure to read. It's orderly and elegant. Everything is there for a reason. \n", | |
"\"\"\"\n", | |
"\n", | |
"# This represent the number of words we want to generate.\n", | |
"MAX_LENGTH = 250" | |
], | |
"execution_count": 27, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "bki6iMqjDsc7", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "056b4187-2d79-4d62-f5f9-367967643cbf" | |
}, | |
"source": [ | |
"# Here we encode the seed text and generate the text.\n", | |
"encoded_text = tokenizer.encode(SEED_TEXT, return_tensors='tf')\n", | |
"tf.random.set_seed(1)\n", | |
"\n", | |
"output = model.generate(\n", | |
" encoded_text,\n", | |
" do_sample=True, \n", | |
" max_length=MAX_LENGTH, \n", | |
" top_k=50, \n", | |
" top_p=0.95, \n", | |
" num_return_sequences=1\n", | |
")" | |
], | |
"execution_count": 28, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "mb07t44BEMIm", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 153 | |
}, | |
"outputId": "9d6c8be3-fece-4a3a-99a0-1cf056cfd52e" | |
}, | |
"source": [ | |
"# Printing out the generated text.\n", | |
"for sample in output:\n", | |
" text = tokenizer.decode(sample, skip_special_tokens=True) \n", | |
" text = text[len(SEED_TEXT):].strip()\n", | |
" print(textwrap.fill(text, 80))" | |
], | |
"execution_count": 29, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Clean Code takes some ideas that you have from other languages and adds some\n", | |
"extra stuff for a more functional look. You'll also notice some interesting\n", | |
"features such as object-oriented design and an introduction to functional\n", | |
"programming. And of course, you can look at how the author was able to use these\n", | |
"features. I would like to see such a book printed around the world. I'd have\n", | |
"loved to have seen the book from the perspective of a programmer, but to read in\n", | |
"clean and simple style? That would have been quite nice. The editor might have\n", | |
"just been wrong, right? Anyway, thank you for making this review!\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "EGmiZOHhFRrv", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment