Skip to content

Instantly share code, notes, and snippets.

View fsndzomga's full-sized avatar

Franck Stéphane Ndzomga fsndzomga

View GitHub Profile
class IdentifyNames(dspy.Signature):
"""Identify names in the text"""
text = dspy.InputField()
names = dspy.OutputField(desc="a list of names found in the text")
namesIdentifier = dspy.ChainOfThought(IdentifyNames)
result = namesIdentifier(text=text)
text = """Barack Hussein Obama II (/bəˈrɑːk huːˈseɪn oʊˈbɑːmə/ ⓘ bə-RAHK hoo-SAYN oh-BAH-mə;[1] born August 4, 1961) is an American politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president in U.S. history. Obama previously served as a U.S. senator representing Illinois from 2005 to 2008, as an Illinois state senator from 1997 to 2004, and as a civil rights lawyer and university lecturer.
Obama was born in Honolulu, Hawaii. He graduated from Columbia University in 1983 with a B.A. in political science and later worked as a community organizer in Chicago. In 1988, Obama enrolled in Harvard Law School, where he was the first black president of the Harvard Law Review. He became a civil rights attorney and an academic, teaching constitutional law at the University of Chicago Law School from 1992 to 2004. He also went into elective politics. Obama represented the 13th district in the Illinois Senate from 1997 u
from dspy.datasets import HotPotQA
# Load the dataset
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2024, dev_size=50, test_size=0)
# Tell DSPy that the 'question' field is the input. Any ohter fields are labels and/or metadata.
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]
class GenerateAnswer(dspy.Signature):
llm = dspy.OpenAI(model='gpt-3.5-turbo')
colbertv2_wiki = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')
dspy.settings.configure(lm=llm, rm=colbertv2_wiki)
%load_ext autoreload
%autoreload 2
import sys
import os
try:
import google.colab
repo_path = 'dspy'
!git -C $repo_path pull origin || git clone https://github.com/standfordnlp/dspy $repo_path
# Creating a bar plot for the coefficients
channels = ['TikTok', 'Facebook', 'Google Ads']
impact_on_sales = coefficients
plt.figure(figsize=(10, 6))
sns.barplot(x=channels, y=impact_on_sales, palette="Blues_d")
plt.title('Impact of Advertising Spend on Sales by Channel')
plt.xlabel('Advertising Channels')
plt.ylabel('Increase in Sales per Unit Spend')
plt.show()
# Predicting on the entire dataset for a comprehensive comparison
y_all_pred = model.predict(X)
plt.figure(figsize=(10, 6))
plt.scatter(y, y_all_pred, alpha=0.5)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=2) # Line of perfect prediction
plt.xlabel('Actual Sales')
plt.ylabel('Predicted Sales')
plt.title('Actual vs. Predicted Sales')
plt.show()
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Data Preparation
X = marketing_data[['TikTok', 'Facebook', 'Google Ads']]
y = marketing_data['Sales']
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
import pandas as pd
# Load the dataset
file_path = '/marketing/data/marketing_mix.csv'
marketing_data = pd.read_csv(file_path)
# Display the first few rows of the dataset to understand its structure
marketing_data.head()
from anonLLM.llm import OpenaiLanguageModel as Brain
from typing import Callable
from dotenv import load_dotenv
from pydantic import BaseModel
import inspect
import datetime
import wikipedia
import os
load_dotenv()