Skip to content

Instantly share code, notes, and snippets.

@agrancini-sc
Created June 16, 2025 19:57
Show Gist options
  • Save agrancini-sc/6352b40d2d9e54d586d169b523dc25b5 to your computer and use it in GitHub Desktop.
Save agrancini-sc/6352b40d2d9e54d586d169b523dc25b5 to your computer and use it in GitHub Desktop.
External API - AI Assistant
VISION OPEN AI
import { Interactable } from "SpectaclesInteractionKit.lspkg/Components/Interaction/Interactable/Interactable";
import { InteractorEvent } from "SpectaclesInteractionKit.lspkg/Core/Interactor/InteractorEvent";
import { SIK } from "SpectaclesInteractionKit.lspkg/SIK";
import { TextToSpeechOpenAI } from "./TextToSpeechOpenAI";
@component
export class VisionOpenAI extends BaseScriptComponent {
@input textInput: Text;
@input textOutput: Text;
@input image: Image;
@input interactable: Interactable;
@input ttsComponent: TextToSpeechOpenAI;
apiKey: string = "Insert your Open AI Key";
// Remote service module for fetching data
private internetModule: InternetModule = require("LensStudio:InternetModule");
private isProcessing: boolean = false;
onAwake() {
this.createEvent("OnStartEvent").bind(() => {
this.onStart();
});
}
onStart() {
let interactionManager = SIK.InteractionManager;
// Define the desired callback logic for the relevant Interactable event.
let onTriggerEndCallback = (event: InteractorEvent) => {
this.handleTriggerEnd(event);
};
this.interactable.onInteractorTriggerEnd(onTriggerEndCallback);
}
async handleTriggerEnd(eventData) {
if (this.isProcessing) {
print("A request is already in progress. Please wait.");
return;
}
if (!this.textInput.text || !this.image || !this.apiKey) {
print("Text, Image, or API key input is missing");
return;
}
try {
this.isProcessing = true;
// Access the texture from the image component
const texture = this.image.mainPass.baseTex;
if (!texture) {
print("Texture not found in the image component.");
return;
}
const base64Image = await this.encodeTextureToBase64(texture);
const requestPayload = {
model: "gpt-4o-mini",
messages: [
{
role: "system",
content:
"You are a helpful AI assistant that works for Snapchat that has access to the view that the user is looking at using Augmented Reality Glasses." +
" The user is asking for help with the following image and text. Keep it short like under 30 words. Be a little funny and keep it positive.",
},
{
role: "user",
content: [
{ type: "text", text: this.textInput.text },
{
type: "image_url",
image_url: {
url: `data:image/jpeg;base64,${base64Image}`,
},
},
],
},
],
};
const request = new Request(
"https://api.openai.com/v1/chat/completions",
{
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify(requestPayload),
}
);
// More about the fetch API: https://developers.snap.com/spectacles/about-spectacles-features/apis/fetch
let response = await this.internetModule.fetch(request);
if (response.status === 200) {
let responseData = await response.json();
this.textOutput.text = responseData.choices[0].message.content;
print(responseData.choices[0].message.content);
// Call TTS to generate and play speech from the response
if (this.ttsComponent) {
this.ttsComponent.generateAndPlaySpeech(
responseData.choices[0].message.content
);
}
} else {
print("Failure: response not successful");
}
} catch (error) {
print("Error: " + error);
} finally {
this.isProcessing = false;
}
}
// More about encodeTextureToBase64: https://platform.openai.com/docs/guides/vision or https://developers.snap.com/api/lens-studio/Classes/OtherClasses#Base64
encodeTextureToBase64(texture) {
return new Promise((resolve, reject) => {
Base64.encodeTextureAsync(
texture,
resolve,
reject,
CompressionQuality.LowQuality,
EncodingType.Jpg
);
});
}
}
TTS OPEN AI
@component
export class TextToSpeechOpenAI extends BaseScriptComponent {
@input audioComponent: AudioComponent;
@input audioOutputAsset: Asset;
@input
@widget(
new ComboBoxWidget()
.addItem("Alloy", "alloy")
.addItem("Echo", "echo")
.addItem("Fable", "fable")
.addItem("Onyx", "onyx")
.addItem("Nova", "nova")
.addItem("Shimmer", "shimmer")
)
voice: string = "alloy"; // Default voice selection
apiKey: string = "Insert your Open AI Key";
// Remote service module for fetching data
private internetModule: InternetModule = require("LensStudio:InternetModule");
onAwake() {
if (!this.internetModule || !this.audioComponent || !this.apiKey) {
print("Remote Service Module, Audio Component, or API key is missing.");
return;
}
if (!this.audioOutputAsset) {
print(
"Audio Output asset is not assigned. Please assign an Audio Output asset in the Inspector."
);
return;
}
this.generateAndPlaySpeech("TextToSpeechOpenAI Ready!");
}
public async generateAndPlaySpeech(inputText: string) {
if (!inputText) {
print("No text provided for speech synthesis.");
return;
}
try {
const requestPayload = {
model: "tts-1",
voice: this.voice,
input: inputText,
response_format: "pcm",
};
const request = new Request("https://api.openai.com/v1/audio/speech", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify(requestPayload),
});
print("Sending request to OpenAI...");
let response = await this.internetModule.fetch(request);
print("Response status: " + response.status);
if (response.status === 200) {
try {
const audioData = await response.bytes();
print("Received audio data, length: " + audioData.length);
if (!this.audioOutputAsset) {
throw new Error("Audio Output asset is not assigned");
}
const track = this.getAudioTrackFromData(audioData);
this.audioComponent.audioTrack = track;
this.audioComponent.play(1);
print("Playing speech: " + inputText);
} catch (processError) {
print("Error processing audio data: " + processError);
}
} else {
const errorText = await response.text();
print("API Error: " + response.status + " - " + errorText);
}
} catch (error) {
print("Error generating speech: " + error);
}
}
getAudioTrackFromData = (audioData: Uint8Array): AudioTrackAsset => {
let outputAudioTrack = this.audioOutputAsset as AudioTrackAsset; // Use the assigned asset
if (!outputAudioTrack) {
throw new Error("Failed to get Audio Output asset");
}
const sampleRate = 24000;
const BUFFER_SIZE = audioData.length / 2;
print("Processing buffer size: " + BUFFER_SIZE);
var audioOutput = outputAudioTrack.control as AudioOutputProvider;
if (!audioOutput) {
throw new Error("Failed to get audio output control");
}
audioOutput.sampleRate = sampleRate;
var data = new Float32Array(BUFFER_SIZE);
// Convert PCM16 to Float32
for (let i = 0, j = 0; i < audioData.length; i += 2, j++) {
const sample = ((audioData[i] | (audioData[i + 1] << 8)) << 16) >> 16;
data[j] = sample / 32768;
}
const shape = new vec3(BUFFER_SIZE, 1, 1);
shape.x = audioOutput.getPreferredFrameSize();
// Enqueue audio frames in chunks
let i = 0;
while (i < BUFFER_SIZE) {
try {
const chunkSize = Math.min(shape.x, BUFFER_SIZE - i);
shape.x = chunkSize;
audioOutput.enqueueAudioFrame(data.subarray(i, i + chunkSize), shape);
i += chunkSize;
} catch (e) {
throw new Error("Failed to enqueue audio frame - " + e);
}
}
return outputAudioTrack;
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment