hideokamoto · May 18, 2024 14:19
diff --git a/hono.ts b/hono.ts
 import { Hono } from 'hono'
 import { CloudflareVectorizeStore } from "langchain/vectorstores/cloudflare_vectorize";
 import { OpenAIEmbeddings } from "langchain/embeddings/openai";
 import { ChatOpenAI } from "langchain/chat_models/openai";
 import { Document } from 'langchain/document';
 import {
    RunnablePassthrough,
    RunnableSequence,
  } from "langchain/schema/runnable";
  import { StringOutputParser } from "langchain/schema/output_parser";
  import {
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
  } from "langchain/prompts";

 const app = new Hono<{
    Bindings: {
        // Aiクラスの型定義もanyなので、とりあえずこれでよさそう
        OPENAI_API_KEY: string;
        WORKER_AI_API_KEY: string
        VECTORIZE_INDEX: VectorizeIndex;
    }
 }>()

 type WPPost = {
    title: {
        rendered: string;
    };
    content: {
        rendered: string;
    };
    excerpt: {
        rendered: string;
    }
    id: number;
 }
 app.post('/index', async c => {
       const embeddings = new OpenAIEmbeddings({
            openAIApiKey: c.env.OPENAI_API_KEY
      })
      const store = new CloudflareVectorizeStore(embeddings, {
        index: c.env.VECTORIZE_INDEX,
      });
      try {
        const fetchResult = await fetch('https://{YOUR_WP_SITE_URL}/wp-json/wp/v2/posts?per_page=50')
        const posts = await fetchResult.json<WPPost[]>()
        const documents: Array<{
            pageContent: string;
            metadata: {
                [key: string]: string;
            }
        }> = []
        const documentIndex: string[] = []
        posts.forEach((post) => {
            documents.push({
                pageContent: `Title:\n${post.title.rendered}\nContent:\n${post.excerpt.rendered}`,
                metadata: {
                    post_id: post.id.toString(),
                }
            })
            documentIndex.push(post.id.toString())
        })
        await store.addDocuments(documents, { ids: documentIndex})

        return c.json({ success: true });
    } catch (e) {
        console.log(e)
        return c.json(e)
    }
 })

 app.post('/ask', async c => {
    const { question } = await c.req.json<{question: string}>()
    if (!question) {
      return c.text('')
    }
    const postNumber = 1
    const model = new ChatOpenAI({
      temperature: 0,
      openAIApiKey: c.env.OPENAI_API_KEY,
      streaming: true,
      cache: true,
    });
    const embeddings = new OpenAIEmbeddings({
         openAIApiKey: c.env.OPENAI_API_KEY
   })
      const store = new CloudflareVectorizeStore(embeddings, {
        index: c.env.VECTORIZE_INDEX,
      });
      
    const serializedDocs = (docs: Array<Document>) =>
        docs.map((doc) => doc.pageContent).join("\n\n");
    // Initialize a retriever wrapper around the vector store
    const vectorStoreRetriever = store.asRetriever();

    // Create a system & human prompt for the chat model
    const SYSTEM_TEMPLATE = `Use the following pieces of context to answer the question at the end.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    ----------------
    {context}`;
    const messages = [
        SystemMessagePromptTemplate.fromTemplate(SYSTEM_TEMPLATE),
        HumanMessagePromptTemplate.fromTemplate("{question}"),
    ];
    const prompt = ChatPromptTemplate.fromMessages(messages);

    const chain = RunnableSequence.from([
        {
            context: vectorStoreRetriever.pipe(serializedDocs),
            question: new RunnablePassthrough(),
        },
        prompt,
        model,
        new StringOutputParser(),
    ]);

    const answerStream = await chain.stream(question, {
        callbacks: [
            {
              handleLLMNewToken(token: string) {
                console.log({ token });
              },
            },
          ],
    });

    return c.streamText(async (stream) => {
        for await (const s of answerStream) {
          await stream.write(s)
          await stream.sleep(10)
        }
      })

    return c.streamText(async stream => {
        stream.write("loading...")
        for await (const chunk of answerStream) {
        console.log({chunk})
          stream.write(chunk)
          await stream.sleep(10)
        }
      })
      
 })

 app.get('/', async c => {

    return c.html(`
  <html>
  <head>
  </head>
  <body>
  <form id="input-form" autocomplete="off" method="post">
    <input
      type="text"
      name="query"
      style={{
        width: '100%'
      }}
    />
    <button type="submit">Send</button>
  </form>
  <h2>AI</h2>
  <pre
    id="ai-content"
    style={{
      'white-space': 'pre-wrap'
    }}
  ></pre>
  <script>
  let target
  let message = ''
  document.addEventListener('DOMContentLoaded', function () {
    target = document.getElementById('ai-content')
    fetchChunked(target)
    console.log('aaa')
    document.getElementById('input-form').addEventListener('submit', function (event) {
      event.preventDefault()
      const formData = new FormData(event.target)
      message = formData.get('query')
      console.log(message)
      fetchChunked(target)
    })
  })
  
  function fetchChunked(target) {
    target.innerHTML = 'loading...'
    fetch('/ask', {
      method: 'post',
      headers: {
        'content-type': 'application/json'
      },
      body: JSON.stringify({ question: message })
    }).then((response) => {
      const reader = response.body.getReader()
      let decoder = new TextDecoder()
      target.innerHTML = ''
      reader.read().then(function processText({ done, value }) {
        console.log({done, value})
        if (done) {
          return
        }
        const data = decoder.decode(value)
        console.log(data)
        target.innerHTML += data
        return reader.read().then(processText)
      })
    })
  }
  </script>
  </body>
  `)
  })
  
 export default app
diff --git a/todo.md b/todo.md
	import { Hono } from 'hono'
	import { CloudflareVectorizeStore } from "langchain/vectorstores/cloudflare_vectorize";
	import { OpenAIEmbeddings } from "langchain/embeddings/openai";
	import { ChatOpenAI } from "langchain/chat_models/openai";
	import { Document } from 'langchain/document';
	import {
	RunnablePassthrough,
	RunnableSequence,
	} from "langchain/schema/runnable";
	import { StringOutputParser } from "langchain/schema/output_parser";
	import {
	ChatPromptTemplate,
	HumanMessagePromptTemplate,
	SystemMessagePromptTemplate,
	} from "langchain/prompts";

	const app = new Hono<{
	Bindings: {
	// Aiクラスの型定義もanyなので、とりあえずこれでよさそう
	OPENAI_API_KEY: string;
	WORKER_AI_API_KEY: string
	VECTORIZE_INDEX: VectorizeIndex;
	}
	}>()

	type WPPost = {
	title: {
	rendered: string;
	};
	content: {
	rendered: string;
	};
	excerpt: {
	rendered: string;
	}
	id: number;
	}
	app.post('/index', async c => {
	const embeddings = new OpenAIEmbeddings({
	openAIApiKey: c.env.OPENAI_API_KEY
	})
	const store = new CloudflareVectorizeStore(embeddings, {
	index: c.env.VECTORIZE_INDEX,
	});
	try {
	const fetchResult = await fetch('https://{YOUR_WP_SITE_URL}/wp-json/wp/v2/posts?per_page=50')
	const posts = await fetchResult.json<WPPost[]>()
	const documents: Array<{
	pageContent: string;
	metadata: {
	[key: string]: string;
	}
	}> = []
	const documentIndex: string[] = []
	posts.forEach((post) => {
	documents.push({
	pageContent: `Title:\n${post.title.rendered}\nContent:\n${post.excerpt.rendered}`,
	metadata: {
	post_id: post.id.toString(),
	}
	})
	documentIndex.push(post.id.toString())
	})
	await store.addDocuments(documents, { ids: documentIndex})

	return c.json({ success: true });
	} catch (e) {
	console.log(e)
	return c.json(e)
	}
	})

	app.post('/ask', async c => {
	const { question } = await c.req.json<{question: string}>()
	if (!question) {
	return c.text('')
	}
	const postNumber = 1
	const model = new ChatOpenAI({
	temperature: 0,
	openAIApiKey: c.env.OPENAI_API_KEY,
	streaming: true,
	cache: true,
	});
	const embeddings = new OpenAIEmbeddings({
	openAIApiKey: c.env.OPENAI_API_KEY
	})
	const store = new CloudflareVectorizeStore(embeddings, {
	index: c.env.VECTORIZE_INDEX,
	});

	const serializedDocs = (docs: Array<Document>) =>
	docs.map((doc) => doc.pageContent).join("\n\n");
	// Initialize a retriever wrapper around the vector store
	const vectorStoreRetriever = store.asRetriever();

	// Create a system & human prompt for the chat model
	const SYSTEM_TEMPLATE = `Use the following pieces of context to answer the question at the end.
	If you don't know the answer, just say that you don't know, don't try to make up an answer.
	----------------
	{context}`;
	const messages = [
	SystemMessagePromptTemplate.fromTemplate(SYSTEM_TEMPLATE),
	HumanMessagePromptTemplate.fromTemplate("{question}"),
	];
	const prompt = ChatPromptTemplate.fromMessages(messages);

	const chain = RunnableSequence.from([
	{
	context: vectorStoreRetriever.pipe(serializedDocs),
	question: new RunnablePassthrough(),
	},
	prompt,
	model,
	new StringOutputParser(),
	]);

	const answerStream = await chain.stream(question, {
	callbacks: [
	{
	handleLLMNewToken(token: string) {
	console.log({ token });
	},
	},
	],
	});

	return c.streamText(async (stream) => {
	for await (const s of answerStream) {
	await stream.write(s)
	await stream.sleep(10)
	}
	})

	return c.streamText(async stream => {
	stream.write("loading...")
	for await (const chunk of answerStream) {
	console.log({chunk})
	stream.write(chunk)
	await stream.sleep(10)
	}
	})

	})

	app.get('/', async c => {

	return c.html(`
	<html>
	<head>
	</head>
	<body>
	<form id="input-form" autocomplete="off" method="post">
	<input
	type="text"
	name="query"
	style={{
	width: '100%'
	}}
	/>
	<button type="submit">Send</button>
	</form>
	<h2>AI</h2>
	<pre
	id="ai-content"
	style={{
	'white-space': 'pre-wrap'
	}}
	></pre>
	<script>
	let target
	let message = ''
	document.addEventListener('DOMContentLoaded', function () {
	target = document.getElementById('ai-content')
	fetchChunked(target)
	console.log('aaa')
	document.getElementById('input-form').addEventListener('submit', function (event) {
	event.preventDefault()
	const formData = new FormData(event.target)
	message = formData.get('query')
	console.log(message)
	fetchChunked(target)
	})
	})

	function fetchChunked(target) {
	target.innerHTML = 'loading...'
	fetch('/ask', {
	method: 'post',
	headers: {
	'content-type': 'application/json'
	},
	body: JSON.stringify({ question: message })
	}).then((response) => {
	const reader = response.body.getReader()
	let decoder = new TextDecoder()
	target.innerHTML = ''
	reader.read().then(function processText({ done, value }) {
	console.log({done, value})
	if (done) {
	return
	}
	const data = decoder.decode(value)
	console.log(data)
	target.innerHTML += data
	return reader.read().then(processText)
	})
	})
	}
	</script>
	</body>
	`)
	})

	export default app