This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import string | |
class MarkovModel: | |
def __init__(self): | |
self.model = None | |
def learn(self,tokens,n=2): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.ml.feature import NGram | |
import PreProcess | |
import random | |
class MarkovModelSpark: | |
def __init__(self, spark_session, n=2): | |
self.spark_session = spark_session |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Now load the data line by line | |
""" | |
from sklearn.model_selection import train_test_split | |
with open('<path to text file>', 'r') as data: | |
dataset = ["<|title|>" + x.strip() for x in data.readlines()] | |
train, eval = train_test_split(dataset, train_size=.9, random_state=2020) | |
print("training size:" + len(train)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# setup imports to use the model | |
from transformers import TFGPT2LMHeadModel | |
from transformers import GPT2Tokenizer | |
model = TFGPT2LMHeadModel.from_pretrained("<path to model directory>", from_pt=True) | |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") | |
input_ids = tokenizer.encode("Some text to encode", return_tensors='tf') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
generated_text_samples = model.generate( | |
input_ids, | |
max_length=150, | |
num_return_sequences=5, | |
no_repeat_ngram_size=2, | |
repetition_penalty=1.5, | |
top_p=0.92, | |
temperature=.85, | |
do_sample=True, | |
top_k=125, |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gradio as gr | |
from operator import itemgetter | |
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder | |
from langchain.memory import ConversationBufferMemory | |
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough | |
# langchain imports | |
from langchain.llms import HuggingFaceTextGenInference |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: '3' | |
services: | |
tgi: | |
image: ghcr.io/huggingface/text-generation-inference:latest | |
container_name: tgi | |
ports: | |
- 8080:80 | |
volumes: | |
- ${LOCAL_MODEL_CACHE_DIR}:/model_cache |