Om Surve gamingflexer

❤️

Chaos all around

ML Engineer [ Building E2E AI Products ] FreeLancer #sih2022winner

gamingflexer / main.py

Created July 19, 2023 09:02

Anthropic's tokenizer for Claude

	from transformers import PreTrainedTokenizerFast

	fast_tokenizer = PreTrainedTokenizerFast(tokenizer_file="/home/ubuntu/LLM/module/claude-v1-tokenization.json")
	text = "Hello, this is a test input."
	tokens = fast_tokenizer.tokenize(text)
	tokens

gamingflexer / nllb_200.py

Created August 28, 2022 14:55

NLLB FB 200 Language Translator

	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

	model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
	tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")

	#for source and target lang check this - https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200

	def translation(text,src_lang,tgt_lang):
	translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length = 400)
	return translator(text)

gamingflexer / classes.py

Created August 28, 2022 14:53

Detectron 2 Classes

gamingflexer / google_fact_check.py

Created August 28, 2022 14:49

Google Fact Check | Web Scrapping

	import requests
	import urllib
	import pandas as pd
	from requests_html import HTML
	from requests_html import HTMLSession

	def get_source(url):
	"""Return the source code for the provided URL.

	Args:

gamingflexer / preprocess.py

Created August 28, 2022 14:47

Bunch of Cleaning Functions | ML & Backend Dev

	import os,re,string,json,emoji,csv
	import numpy as np
	import pandas as pd

	def clean_text(text):
	'''Clean emoji, Make text lowercase, remove text in square brackets,remove links,remove punctuation
	and remove words containing numbers.'''
	text = emoji.demojize(text)
	text = re.sub(r'\:(.*?)\:', '', text)
	text = str(text).lower() # Making Text Lowercase

gamingflexer / download_twitter_video_from_link.py

Created August 28, 2022 14:43

Download Twitter Video From Link

	import sys,json,re,logging
	import requests


	class getVideo():
	def __init__(self,video_url):
	logging.info("downloading video - ",str(video_url))
	video_id = video_url.split('/')[5].split('?')[0] if 's?=' in video_url else video_url.split('/')[5]
	self.log = {}
	sources = {

gamingflexer / top_headlines.py

Created August 28, 2022 14:41

Scrape Top Headlines For a Particular Hashtags

	import requests,json
	API_KEY = ""

	def headline_script():
	url = "https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/search/NewsSearchAPI"

	querystring = {"q":"TOPIC NEEDED","pageNumber":"1","pageSize":"10","autoCorrect":"true","fromPublishedDate":"null","toPublishedDate":"null"}

	headers = {
	"X-RapidAPI-Key": API_KEY,

gamingflexer / instagram_video_from_link.py

Created August 28, 2022 14:39

Download Instagram Video from link

	def download_image_video(url):

	x = re.match(r'^(https:)[/][/]www.([^/]+[.])*instagram.com', url)

	try:
	if x:
	request_image = requests.get(url)
	src = request_image.content.decode('utf-8')
	check_type = re.search(r'<meta name="medium" content=[\'"]?([^\'" >]+)', src)
	check_type_f = check_type.group()

gamingflexer / twitter_scrapper.py

Created August 28, 2022 14:38

Twitter Scrapper | Hashtags & User

	# pip3 install --user --upgrade git+https://github.com/twintproject/twint.git@origin/master#egg=twint

	import os
	import twint

	tempath = "add a temp path folder"

	def top_tweets(username):
	for user in username:
	c = twint.Config()

gamingflexer / detectron2_video_od.py

Last active August 29, 2022 06:21

Detectron 2 | Video object Detection | Copy and Run

	# install detectron2
	# git clone https://github.com/facebookresearch/detectron2.git
	# cd detectron2
	# pip install -e .
	# cd ..


	import uuid
	from detectron2.engine import DefaultPredictor
	from detectron2.config import get_cfg