This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import fetch_20newsgroups | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn import metrics | |
import numpy as np | |
import os | |
import pdb | |
from concurrent.futures import ThreadPoolExecutor, as_completed | |
from transformers import GPT2LMHeadModel, GPT2Tokenizer | |
import torch |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import fetch_20newsgroups | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn import metrics | |
import numpy as np | |
import time | |
from anonLLM.llm import OpenaiLanguageModel | |
from keys import OPENAI_API_KEY | |
import os | |
import pdb |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import fetch_20newsgroups | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn import metrics | |
import numpy as np | |
from langchain.chat_models import ChatOpenAI | |
from keys import OPENAI_API_KEY | |
import os | |
from langchain.schema import ( | |
HumanMessage, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import fetch_20newsgroups | |
from langchain.chat_models import ChatOpenAI | |
categories = ['alt.atheism', 'talk.religion.misc', 'comp.graphics', 'sci.space'] | |
newsgroups_train = fetch_20newsgroups(subset='train', remove=('headers', 'footers', 'quotes'), categories=categories) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
llm = ChatOpenAI(max_tokens=200) | |
newsgroups_train_enriched = [] | |
data = newsgroups_train.data | |
for index in range(len(data)): | |
# ... (Data preprocessing) | |
messages = [ | |
SystemMessage(content="You are a helpful assistant that classifies texts."), | |
HumanMessage(content=f"Give one category to the following text: {text}. The category should be among this list: {categories}") | |
] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn import metrics | |
vectorizer = TfidfVectorizer() | |
vectors = vectorizer.fit_transform(newsgroups_train_enriched) | |
clf = MultinomialNB(alpha=.01) | |
clf.fit(vectors, newsgroups_train.target) | |
newsgroups_test = fetch_20newsgroups(subset='test', remove=('headers', 'footers', 'quotes'), categories=categories) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from pathlib import Path | |
from typing import Optional | |
import typer | |
from wasabi import msg | |
from spacy_llm.util import assemble | |
Arg = typer.Argument |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter, defaultdict | |
import re | |
def get_stats(vocab): | |
pairs = Counter() | |
for word, freq in vocab.items(): | |
symbols = word.split() | |
for i in range(len(symbols) - 1): | |
pairs[symbols[i], symbols[i + 1]] += freq | |
return pairs |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import pipeline | |
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report | |
import pandas as pd | |
df = pd.read_csv('amazon_cells_labelled.txt', delimiter='\t', header=None, names=['Review', 'Sentiment']) | |
classifier = pipeline("sentiment-analysis") | |
predicted_sentiments = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from langchain import hub, LLMChain | |
from langchain.chat_models import ChatOpenAI | |
from keys import OPENAI_API_KEY | |
import os | |
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY | |
assumption_template = hub.pull("smithing-gold/assumption-checker") | |
llm = ChatOpenAI() |