Skip to content

Instantly share code, notes, and snippets.

@janduplessis883
Created December 30, 2024 14:12
Show Gist options
  • Save janduplessis883/100c59b13f501f6194351188b34936e9 to your computer and use it in GitHub Desktop.
Save janduplessis883/100c59b13f501f6194351188b34936e9 to your computer and use it in GitHub Desktop.
CrewAI FileReadTool() - error
import toml
import pandas as pd
import textwrap
import yaml
from crewai import Agent, Task, Crew, Process, LLM
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
from crewai.knowledge.source.pdf_knowledge_source import PDFKnowledgeSource
# Create a knowledge source
from crewai_tools import PDFSearchTool, FileReadTool
nihr_guidelines_rag = PDFSearchTool(pdf="knowledge/eoi_guidelines_nihr.pdf")
nihr_guidelines_content = FileReadTool(file_path="knowledge/eoi_guidelines_nihr.txt")
site_profile_rag = PDFSearchTool(pdf="knowledge/thechelseapractice.pdf")
site_profile_content = FileReadTool(file_path="knowledge/thechelseapractice.txt")
site_identification_questions = FileReadTool(file_path="knowledge/site_identification_questions.txt")
# 🅾️ Update study.txt with each new study or make a tool to extract the pdf version of the study docuyment.
study_rag = PDFSearchTool(pdf="knowledge/study.pdf")
study_content = FileReadTool(file_path="knowledge/study.txt")
# Create an LLM with a temperature of 0 to ensure deterministic outputs
llm = LLM(model="gpt-4o-mini", temperature=0)
with open("notioncrew/config_secrets.toml", "r") as f:
config_secrets = toml.load(f)
# Load environment variables from streamlit secrets
OPENAI_API_KEY = config_secrets["OPENAI_API_KEY"]
OPENAI_MODEL_NAME = config_secrets["OPENAI_MODEL_NAME"]
NOTION_ENDPOINT = config_secrets["NOTION_ENDPOINT"]
NOTION_VERSION = config_secrets["NOTION_VERSION"]
NOTION_TOKEN = config_secrets["NOTION_TOKEN"]
NOTION_DATABASE_ID = config_secrets["NOTION_DATABASE_ID"]
SERPER_API_KEY = config_secrets["SERPER_API_KEY"]
APPRAISAL_DATABASE_ID = config_secrets["APPRAISAL_DATABASE_ID"]
# Define file paths for YAML configurations
files = {
"agents": "notioncrew/config/agents.yaml",
"tasks": "notioncrew/config/tasks.yaml",
}
# Load configurations from YAML files
configs = {}
for config_type, file_path in files.items():
with open(file_path, "r") as file:
configs[config_type] = yaml.safe_load(file)
# Assign loaded configurations to specific variables
agents_config = configs["agents"]
tasks_config = configs["tasks"]
clinical_researcher_agent = Agent(
config=agents_config["clinical_researcher_agent"],
tools=[study_rag],
)
research_coordinator_agent = Agent(
config=agents_config["research_coordinator_agent"],
tools=[site_profile_content, site_profile_rag, nihr_guidelines_content],
)
senior_research_writer_agent = Agent(
config=agents_config["senior_research_writer_agent"],
tools=[nihr_guidelines_content, site_identification_questions],
)
research_auditor_agent = Agent(
config=agents_config["research_auditor_agent"],
tools=[nihr_guidelines_content, nihr_guidelines_rag],
)
# TASKS
review_study_specifications = Task(
config=tasks_config["review_study_specifications"],
agent=clinical_researcher_agent,
output_file="01_the_study.md",
)
review_research_site = Task(
config=tasks_config["review_research_site"],
agent=research_coordinator_agent,
output_file="02_site_profile.md",
)
complete_site_identification_questionnaire = Task(
config=tasks_config["complete_site_identification_questionnaire"],
agent=senior_research_writer_agent,
tools = [site_identification_questions],
output_file="03_site_identification_questions.md",
)
audit_site_identification_questionnaire = Task(
config=tasks_config["audit_site_identification_questionnaire"],
agent=research_auditor_agent,
output_file="04_submission_audit.md",
)
# Creating Crew
crew = Crew(
agents=[
clinical_researcher_agent,
research_coordinator_agent,
senior_research_writer_agent,
research_auditor_agent,
],
tasks=[
review_study_specifications,
review_research_site,
complete_site_identification_questionnaire,
audit_site_identification_questionnaire,
],
process=Process.sequential,
verbose=True,
)
inputs = {"study_identifier": "CN012-0023"}
result = crew.kickoff()
costs = (
0.150
* (crew.usage_metrics.prompt_tokens + crew.usage_metrics.completion_tokens)
/ 1_000_000
)
print(f"💷 **Total costs**: ${costs:.4f}")
# Convert UsageMetrics instance to a DataFrame
df_usage_metrics = pd.DataFrame([crew.usage_metrics.dict()])
print(df_usage_metrics)
markdown_text = result.raw
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment