This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
All content here has been moved to https://github.com/Emekaborisama/100daysofdscode |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from magniv.core import task | |
from datetime import datetime | |
import urllib | |
import json | |
import tweepy as tp | |
#auth for twitter api | |
auth = tp.OAuthHandler('xxxxxxx', 'xxxxxxxx') | |
auth.set_access_token('xxxxx-xxxxx', 'xxxxxxxx') | |
api = tp.API(auth, wait_on_rate_limit=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib | |
import json | |
def get_bitcoin_data(): | |
"""get btc info via messari api""" | |
main_result = {} | |
try: | |
url = "https://data.messari.io/api/v1/assets/btc/metrics" | |
resp = urllib.request.urlopen(url).read() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tweepy as tp | |
#auth for twitter api | |
auth = tp.OAuthHandler('xxxxxxxxx', 'xxxxxxx') | |
auth.set_access_token('xxxx-xxxxx', 'xxxxxx') | |
api = tp.API(auth, wait_on_rate_limit=False) | |
try: | |
api.verify_credentials() | |
print("Authentication done") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sentence_transformers import SentenceTransformer,util | |
from transformers import AutoTokenizer, AutoModel | |
import torch | |
import torch.nn.functional as F | |
#Mean Pooling - Take attention mask into account for correct averaging | |
def mean_pooling(model_output, attention_mask): | |
token_embeddings = model_output[0] | |
print(token_embeddings) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sentences we want sentence embeddings for | |
sentences = ['This is an example sentence', 'This is sample of the sentence'] | |
import time | |
start = time.time() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
torch.onnx.export( | |
model, | |
tuple(encoded_input.values()), | |
f="torch-model.onnx", | |
input_names=['input_ids', 'attention_mask','token_type_ids'], | |
output_names=['logits'], | |
dynamic_axes={'input_ids': {0: 'batch_size', 1: 'sequence'}, | |
'attention_mask': {0: 'batch_size', 1: 'sequence'}, | |
'token_type_ids': {0: 'batch_size', 1: 'sequence'}, | |
'logits': {0: 'batch_size', 1: 'sequence'}}, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import onnxruntime | |
import time | |
ort_session = onnxruntime.InferenceSession("torch-model.onnx", providers=["CPUExecutionProvider"]) | |
def to_numpy(tensor): | |
return tensor.detach.cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() | |
def run_inference(input): | |
tokenei= tokenizer(input, padding=True, truncation=True,return_tensors="pt") | |
attention_mask = tokenei['attention_mask'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
#read csv file | |
train_df = pd.read_csv("train.csv") | |
#print the len of the dataframe | |
print(len(train_df)) | |
#print the summary of the dataset | |
train_df.info() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#handle missing values | |
from sklearn.impute import SimpleImputer | |
imp_ = SimpleImputer(missing_values=np.nan, strategy='most_frequent') | |
new_train_df = imp_.fit_transform(train_df) | |
new_train_df = pd.DataFrame(new_train_df, columns = train_df.columns) |
OlderNewer