Manu Suryavansh suryavanshi

Deep Learning Engineer, GenAI, LLM

suryavanshi / augment_generate.py

Created August 13, 2021 10:52

	from transformers import pipeline
	generator = pipeline('text-generation', model='gpt2')

	input_text = "I went to see a movie in the theater"
	input_length = len(input_text.split())
	num_new_words = 5
	output_length = input_length + num_new_words
	gpt_output = generator(input_text, max_length=output_length, num_return_sequences=5)
	augmented_text = gpt_output[0]['generated_text']
	print("Augmented text->",augmented_text)

suryavanshi / unmasker_replace.py

Last active March 13, 2022 23:25

	from transformers import pipeline
	import random

	unmasker = pipeline('fill-mask', model='bert-base-cased')

	input_text = "I went to see a movie in the theater"

	orig_text_list = input_text.split()
	len_input = len(orig_text_list)
	#Random index where we want to replace the word

suryavanshi / unmasker_insert.py

Last active August 13, 2021 10:34

	from transformers import pipeline
	import random

	unmasker = pipeline('fill-mask', model='bert-base-cased')

	input_text = "I went to see a movie in the theater"

	orig_text_list = input_text.split()
	len_input = len(orig_text_list)
	#Random index where we want to insert the word except at the start or end

suryavanshi / back_translate.py

Created August 13, 2021 09:43

back_translate.py

	from transformers import pipeline
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

	#English to German using the Pipeline and T5
	translator_en_to_de = pipeline("translation_en_to_de", model='t5-base')

	#Germal to English using Bert2Bert model
	tokenizer = AutoTokenizer.from_pretrained("google/bert2bert_L-24_wmt_de_en", pad_token="<pad>", eos_token="</s>", bos_token="<s>")
	model_de_to_en = AutoModelForSeq2SeqLM.from_pretrained("google/bert2bert_L-24_wmt_de_en")

suryavanshi / presidio_pii.py

Last active May 19, 2021 08:09

presidio_pii.py

	#From - https://microsoft.github.io/presidio/getting_started/
	from presidio_analyzer import AnalyzerEngine
	from presidio_anonymizer import AnonymizerEngine

	text="My phone number is 212-555-5555"

	# Set up the engine, loads the NLP module (spaCy model by default)
	# and other PII recognizers
	analyzer = AnalyzerEngine()

suryavanshi / spacy_ner.py

Last active May 19, 2021 08:11

	import spacy #Using Spacy version 2.2.3
	nlp = spacy.load("en_core_web_lg")

	inp_text = "My name is John Wick, I live in California"
	doc = nlp(inp_text)

	for ent in doc.ents:
	print(ent.text, ent.start_char, ent.end_char, ent.label_)
	new_tokens = []
	for token in doc: