Created
October 8, 2024 09:04
-
-
Save jexp/45e9902853feebba0a55b8736a59a7e6 to your computer and use it in GitHub Desktop.
Devoxx 2024 Schedule as Neo4j including Vector Embeddings and Vector Search
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
rows = [ | |
{ | |
"id": 12124, | |
"title": "Meet Chicory, exploit the power of WebAssembly on the server side!", | |
"description": "WebAssembly is a rapidly emerging technology that enables the execution of code written in various languages while providing strong sandboxing and safety guarantees.<br>Initially developed for the web to enhance browser capabilities, developers soon recognized the potential of reusing Wasm modules in server-side applications. wazero, a native Go runtime for Wasm, played a pivotal role in showcasing the versatility and power of this solution. With its widespread adoption and integration into diverse applications, wazero demonstrated the value of using Wasm modules beyond the web environment.<br>Inspired by the goals of wazero, we launched Chicory, a pure Java interpreter, with zero dependencies, for Wasm. Chicory empowers developers to load and execute Wasm modules with fine-grained control over their interactions with the system and memory allocation. Notably, Chicory seamlessly integrates with barebone JVM runtimes, eliminating any system dependencies.<br>In this presentation, we will explore the exciting possibilities that Chicory offers for the JVM ecosystem. Through practical, real-world examples, we will showcase how Chicory can be seamlessly integrated into your application, enabling you to run Wasm programs within minutes. Additionally, we will discuss the various approaches to designing integrations, exploring the trade-offs associated with each option.", | |
"summary": "WebAssembly\nWasm Modules\nChicory\nJVM Ecosystem", | |
"afterVideoURL": null, | |
"podcastURL": null, | |
"audienceLevel": "INTERMEDIATE", | |
"language": null, | |
"totalFavourites": 42, | |
"track": { | |
"id": 2758, | |
"name": "Mind the Geek", | |
"description": "Developer candy: stuff we want to know about but dont (generally) at work, Robotics, biological computing, cybernetics, AI, new toys, tomorrows world", | |
"imageURL": "https://devoxx-tracks.s3.eu-west-1.amazonaws.com/mind-the-geek.png" | |
}, | |
"sessionType": { | |
"id": 957, | |
"name": "Tools-in-Action", | |
"duration": 30, | |
"pause": false, | |
"description": "Half an hour sessions focused on demonstrating technical tools or solutions.", | |
"cssColor": null | |
}, | |
"speakers": [ | |
{ | |
"id": 5729, | |
"firstName": "Andrea", | |
"lastName": "Peruffo", | |
"fullName": "Andrea Peruffo", | |
"bio": "With nearly two decades of coding experience, I'm fueled by passion as I continue to type away daily.<br>As a Principal Software Engineer at Red Hat, I actively contribute to diverse Open Source projects, driven by both personal fulfillment and professional advancement. My not-so-secret passion lies in programming languages, developer tools, compilers, and beyond. Come and spot me on a project near you!", | |
"anonymizedBio": null, | |
"company": "Red Hat", | |
"imageUrl": "https://devoxxian-image-thumbnails.s3-eu-west-1.amazonaws.com/profile-ff9843a3-a619-41ad-bcd0-33196cc60504.jpeg", | |
"twitterHandle": "@and_prf", | |
"linkedInUsername": null | |
} | |
], | |
"keywords": [ | |
{ | |
"name": "WebAssembly" | |
}, | |
{ | |
"name": "JVM Ecosystem" | |
}, | |
{ | |
"name": "wazero" | |
}, | |
{ | |
"name": "Chicory" | |
} | |
], | |
"timeSlots": [] | |
} | |
] | |
*/ | |
// Adding constraints | |
CREATE CONSTRAINT FOR (s:Session) REQUIRE s.id IS UNIQUE; | |
CREATE CONSTRAINT FOR (t:Track) REQUIRE t.id IS UNIQUE; | |
CREATE CONSTRAINT FOR (st:SessionType) REQUIRE st.id IS UNIQUE; | |
CREATE CONSTRAINT FOR (sp:Speaker) REQUIRE sp.id IS UNIQUE; | |
CREATE CONSTRAINT FOR (k:Keyword) REQUIRE k.name IS UNIQUE; | |
// Load and Import data | |
call apoc.load.json("https://dvbe24.cfp.dev/api/public/talks?sort=name,asc") yield value as row | |
WITH row, row.track as track, row.sessionType as type | |
MERGE (s:Session {id: row.id}) | |
SET s += row {.title,.description,.summary,.afterVideoURL,.podcastURL,.audienceLevel,.language,.totalFavourites} | |
MERGE (t:Track {id: track.id}) | |
SET t += track {.name, .description,.imageURL} | |
MERGE (s)-[:BELONGS_TO]->(t) | |
MERGE (st:SessionType {id: type.id}) | |
SET st += type { .name,.duration,.pause,.description } | |
MERGE (s)-[:HAS_TYPE]->(st) | |
FOREACH (speaker IN row.speakers | | |
MERGE (sp:Speaker {id: speaker.id}) | |
SET sp += speaker { .firstName, .lastName,.fullName,.bio,.anonymizedBio,.company,.imageUrl,.twitterHandle,.linkedInUsername } | |
MERGE (s)-[:HAS_SPEAKER]->(sp) | |
) | |
FOREACH (keyword IN row.keywords | | |
MERGE (k:Keyword {name: keyword.name}) | |
MERGE (s)-[:HAS_KEYWORD]->(k) | |
); | |
// create vector indexes | |
CREATE VECTOR INDEX speakerEmbeddings IF NOT EXISTS | |
FOR (s:Speaker) | |
ON s.embedding | |
OPTIONS { indexConfig: { | |
`vector.dimensions`: 1536, | |
`vector.similarity_function`: 'cosine' | |
}}; | |
CREATE VECTOR INDEX sessionEmbeddings IF NOT EXISTS | |
FOR (s:Session) | |
ON s.embedding | |
OPTIONS { indexConfig: { | |
`vector.dimensions`: 1536, | |
`vector.similarity_function`: 'cosine' | |
}}; | |
create text index for (sp:Speaker) on (sp.fullName); | |
// :param token="sk-..." | |
// embed text properties | |
match (s:Session) where s.embedding is null | |
with s, coalesce(s.title,"") + "\n" + coalesce(s.summary,"") + "\n" + coalesce(s.description,"") as text | |
with s, genai.vector.encode(text, "OpenAI",{token:$token, model:"text-embedding-3-small"}) as embedding | |
call db.create.setNodeVectorProperty(s, 'embedding', embedding); | |
match (s:Speaker) where s.embedding is null | |
with s, coalesce(s.fullName,"") + "\n" + coalesce(s.company,"") + "\n" + coalesce(s.bio,"") as text | |
with s, genai.vector.encode(text, "OpenAI",{token:$token, model:"text-embedding-3-small"}) as embedding | |
call db.create.setNodeVectorProperty(s, 'embedding', embedding); | |
// embed user question and do vector search and graph search | |
with "What's new about Valhalla?" as question | |
with genai.vector.encode(question, "OpenAI",{token:$token, model:"text-embedding-3-small"}) as questionEmbedding | |
call db.index.vector.queryNodes('sessionEmbeddings',5,questionEmbedding) yield node as s, score | |
where score > 0.7 | |
match path=(kw)<-[:HAS_KEYWORD]-(s)-[:HAS_SPEAKER]->(sp) | |
return path, score; | |
with "Talks about Kubernetes and Openshift?" as question | |
with genai.vector.encode(question, "OpenAI",{token:$token, model:"text-embedding-3-small"}) as questionEmbedding | |
call db.index.vector.queryNodes('sessionEmbeddings',5,questionEmbedding) yield node as s, score | |
where score > 0.7 | |
match path=(kw)<-[:HAS_KEYWORD]-(s)-[:HAS_SPEAKER]->(sp) | |
return score, s.title, collect(distinct sp.fullName+", "+sp.company) as speakers, collect(distinct kw.name) as keywords, s.summary, s.description | |
// return path, score; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment