This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import PreTrainedTokenizerFast | |
fast_tokenizer = PreTrainedTokenizerFast(tokenizer_file="/home/ubuntu/LLM/module/claude-v1-tokenization.json") | |
text = "Hello, this is a test input." | |
tokens = fast_tokenizer.tokenize(text) | |
tokens |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M") | |
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M") | |
#for source and target lang check this - https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200 | |
def translation(text,src_lang,tgt_lang): | |
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length = 400) | |
return translator(text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
className = {0: u'__background__', | |
1: u'person', | |
2: u'bicycle', | |
3: u'car', | |
4: u'motorcycle', | |
5: u'airplane', | |
6: u'bus', | |
7: u'train', | |
8: u'truck', | |
9: u'boat', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import urllib | |
import pandas as pd | |
from requests_html import HTML | |
from requests_html import HTMLSession | |
def get_source(url): | |
"""Return the source code for the provided URL. | |
Args: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os,re,string,json,emoji,csv | |
import numpy as np | |
import pandas as pd | |
def clean_text(text): | |
'''Clean emoji, Make text lowercase, remove text in square brackets,remove links,remove punctuation | |
and remove words containing numbers.''' | |
text = emoji.demojize(text) | |
text = re.sub(r'\:(.*?)\:', '', text) | |
text = str(text).lower() # Making Text Lowercase |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys,json,re,logging | |
import requests | |
class getVideo(): | |
def __init__(self,video_url): | |
logging.info("downloading video - ",str(video_url)) | |
video_id = video_url.split('/')[5].split('?')[0] if 's?=' in video_url else video_url.split('/')[5] | |
self.log = {} | |
sources = { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests,json | |
API_KEY = "" | |
def headline_script(): | |
url = "https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/search/NewsSearchAPI" | |
querystring = {"q":"TOPIC NEEDED","pageNumber":"1","pageSize":"10","autoCorrect":"true","fromPublishedDate":"null","toPublishedDate":"null"} | |
headers = { | |
"X-RapidAPI-Key": API_KEY, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def download_image_video(url): | |
x = re.match(r'^(https:)[/][/]www.([^/]+[.])*instagram.com', url) | |
try: | |
if x: | |
request_image = requests.get(url) | |
src = request_image.content.decode('utf-8') | |
check_type = re.search(r'<meta name="medium" content=[\'"]?([^\'" >]+)', src) | |
check_type_f = check_type.group() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip3 install --user --upgrade git+https://github.com/twintproject/twint.git@origin/master#egg=twint | |
import os | |
import twint | |
tempath = "add a temp path folder" | |
def top_tweets(username): | |
for user in username: | |
c = twint.Config() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install detectron2 | |
# git clone https://github.com/facebookresearch/detectron2.git | |
# cd detectron2 | |
# pip install -e . | |
# cd .. | |
import uuid | |
from detectron2.engine import DefaultPredictor | |
from detectron2.config import get_cfg |