|
from langchain_groq import ChatGroq |
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder |
|
from langchain_core.pydantic_v1 import BaseModel, Field |
|
from typing import List |
|
import json |
|
|
|
class Specimen(BaseModel): |
|
country: str = Field(None, title="The country in which the specimen was collected") |
|
stateProvince: str = Field(None, title="The state or province in which the specimen was collected") |
|
locality: str = Field(None, title="The locality in which the specimen was collected") |
|
decimalLatitude: str = Field(None, title="The decimal latitude of the collection point of the specimen") |
|
decimalLongitude: str = Field(None, title="The decimal longitude of the collection point of the specimen") |
|
eventDate: str = Field(None, title="The date on which the specimen was collected in YYYY-MM-DD format") |
|
recordedBy: str = Field(None, title="The names of the collectors who gathered the specimen") |
|
recordNumber: str = Field(None, title="The number allocated to the collection event by the collectors in the field") |
|
catalogNumber: str = Field(None, title="The catalog number assigned to the specimen on accession into an institutional specimen collection") |
|
institutionCode: str = Field(None, title="The alphabetic code of the institution in which the specimen is housed") |
|
|
|
def toJson(self): |
|
return json.dumps(self, default=lambda o: o.__dict__) |
|
|
|
class SpecimenList(BaseModel): |
|
specimen_list: List[Specimen] = Field(None, title="Specimens") |
|
|
|
system = """You are an expert at reformatting scientific information about specimens from natural history collections. \ |
|
You only ever reformat data, you don't add data for which there is no basis in the input""" |
|
|
|
specimen_data = """Colombia. Antioquia: Mun. Urrao, between Urrao and Caicedo, 21 km E of Urrao, near high point on |
|
road, 6°24`N, 76°02`W, 27 Feb 1989, MacDougal et al. 4251 (MO). Cundinamarca: 17 Feb 1950, von |
|
Sneidern 5825 (S). Ecuador. Napo: Parroquia Cosanga, 6 kms de la carretera Cosanga-El Aliso, 23 |
|
Aug 1990, Jaramillo et al. 12110 (MO). Canton Quijos, Río Aliso, 8 km al suroeste de Cosanga, |
|
0°37`S, 77°56`W, 15 Nov 1998, Vargas et al. 3043 (MO). Pichincha: km 59 de la carretera antigua |
|
Quito-Santo Domingo de los Colorados, a 3.5 km al NE de la carretera, 28 Mar 1987, Zak 1857A (F, MO)""" |
|
|
|
def main(): |
|
llm = ChatGroq( |
|
model="mixtral-8x7b-32768", |
|
temperature=0, |
|
max_tokens=None, |
|
timeout=None, |
|
max_retries=2, |
|
) |
|
|
|
prompt = ChatPromptTemplate.from_messages( |
|
[ |
|
("system", system), |
|
MessagesPlaceholder("examples", optional=True), |
|
("human", "{question}"), |
|
] |
|
) |
|
|
|
specimen_chain = (prompt |
|
| llm.with_structured_output(SpecimenList) |
|
) |
|
|
|
specimens = specimen_chain.invoke(specimen_data) |
|
|
|
for specimen in specimens.specimen_list: |
|
print(specimen.toJson()) |
|
|
|
if __name__ == '__main__': |
|
main() |