Skip to content

Instantly share code, notes, and snippets.

View fsndzomga's full-sized avatar

Franck Stéphane Ndzomga fsndzomga

View GitHub Profile
@fsndzomga
fsndzomga / data_enrichment_gpt2.py
Created September 11, 2023 16:23
Data enrichment case 1
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
import numpy as np
import os
import pdb
from concurrent.futures import ThreadPoolExecutor, as_completed
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
@fsndzomga
fsndzomga / data_enrichment_gpt3_anon.py
Created September 11, 2023 16:24
Data enrichment using gpt3 via anonllm
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
import numpy as np
import time
from anonLLM.llm import OpenaiLanguageModel
from keys import OPENAI_API_KEY
import os
import pdb
@fsndzomga
fsndzomga / data_enrichment_gpt3_langch.py
Created September 11, 2023 16:25
data enrichment using gpt3 and langchain
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
import numpy as np
from langchain.chat_models import ChatOpenAI
from keys import OPENAI_API_KEY
import os
from langchain.schema import (
HumanMessage,
@fsndzomga
fsndzomga / import-train.py
Created September 12, 2023 11:35
Import train dataset
from sklearn.datasets import fetch_20newsgroups
from langchain.chat_models import ChatOpenAI
categories = ['alt.atheism', 'talk.religion.misc', 'comp.graphics', 'sci.space']
newsgroups_train = fetch_20newsgroups(subset='train', remove=('headers', 'footers', 'quotes'), categories=categories)
@fsndzomga
fsndzomga / enrichment.py
Created September 12, 2023 11:37
Data enrichment
llm = ChatOpenAI(max_tokens=200)
newsgroups_train_enriched = []
data = newsgroups_train.data
for index in range(len(data)):
# ... (Data preprocessing)
messages = [
SystemMessage(content="You are a helpful assistant that classifies texts."),
HumanMessage(content=f"Give one category to the following text: {text}. The category should be among this list: {categories}")
]
@fsndzomga
fsndzomga / training-validation.py
Created September 12, 2023 11:39
Training and validation
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(newsgroups_train_enriched)
clf = MultinomialNB(alpha=.01)
clf.fit(vectors, newsgroups_train.target)
newsgroups_test = fetch_20newsgroups(subset='test', remove=('headers', 'footers', 'quotes'), categories=categories)
@fsndzomga
fsndzomga / relation_extraction.py
Created September 15, 2023 13:06
relation extraction example
import os
from pathlib import Path
from typing import Optional
import typer
from wasabi import msg
from spacy_llm.util import assemble
Arg = typer.Argument
@fsndzomga
fsndzomga / bpe.py
Created September 18, 2023 22:20
Byte Pair Encoding Algorithm
from collections import Counter, defaultdict
import re
def get_stats(vocab):
pairs = Counter()
for word, freq in vocab.items():
symbols = word.split()
for i in range(len(symbols) - 1):
pairs[symbols[i], symbols[i + 1]] += freq
return pairs
@fsndzomga
fsndzomga / transformers-sentiment-analysis.py
Created September 18, 2023 22:56
using the transformers library for sentiment analysis
from transformers import pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd
df = pd.read_csv('amazon_cells_labelled.txt', delimiter='\t', header=None, names=['Review', 'Sentiment'])
classifier = pipeline("sentiment-analysis")
predicted_sentiments = []
@fsndzomga
fsndzomga / assumption-checker.py
Created September 19, 2023 14:04
Assumption checker using langchain
from langchain import hub, LLMChain
from langchain.chat_models import ChatOpenAI
from keys import OPENAI_API_KEY
import os
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
assumption_template = hub.pull("smithing-gold/assumption-checker")
llm = ChatOpenAI()