agrancini-sc · June 16, 2025 19:57
diff --git a/External API - AI Assistant b/External API - AI Assistant
 VISION OPEN AI 

 import { Interactable } from "SpectaclesInteractionKit.lspkg/Components/Interaction/Interactable/Interactable";
 import { InteractorEvent } from "SpectaclesInteractionKit.lspkg/Core/Interactor/InteractorEvent";
 import { SIK } from "SpectaclesInteractionKit.lspkg/SIK";
 import { TextToSpeechOpenAI } from "./TextToSpeechOpenAI";

 @component
 export class VisionOpenAI extends BaseScriptComponent {
  @input textInput: Text;
  @input textOutput: Text;
  @input image: Image;
  @input interactable: Interactable;
  @input ttsComponent: TextToSpeechOpenAI;

  apiKey: string = "Insert your Open AI Key";

  // Remote service module for fetching data
  private internetModule: InternetModule = require("LensStudio:InternetModule");

  private isProcessing: boolean = false;

  onAwake() {
    this.createEvent("OnStartEvent").bind(() => {
      this.onStart();
    });
  }

  onStart() {
    let interactionManager = SIK.InteractionManager;

    // Define the desired callback logic for the relevant Interactable event.
    let onTriggerEndCallback = (event: InteractorEvent) => {
      this.handleTriggerEnd(event);
    };

    this.interactable.onInteractorTriggerEnd(onTriggerEndCallback);
  }

  async handleTriggerEnd(eventData) {
    if (this.isProcessing) {
      print("A request is already in progress. Please wait.");
      return;
    }

    if (!this.textInput.text || !this.image || !this.apiKey) {
      print("Text, Image, or API key input is missing");
      return;
    }

    try {
      this.isProcessing = true;

      // Access the texture from the image component
      const texture = this.image.mainPass.baseTex;
      if (!texture) {
        print("Texture not found in the image component.");
        return;
      }

      const base64Image = await this.encodeTextureToBase64(texture);

      const requestPayload = {
        model: "gpt-4o-mini",
        messages: [
          {
            role: "system",
            content:
              "You are a helpful AI assistant that works for Snapchat that has access to the view that the user is looking at using Augmented Reality Glasses." +
              " The user is asking for help with the following image and text. Keep it short like under 30 words. Be a little funny and keep it positive.",
          },
          {
            role: "user",
            content: [
              { type: "text", text: this.textInput.text },
              {
                type: "image_url",
                image_url: {
                  url: `data:image/jpeg;base64,${base64Image}`,
                },
              },
            ],
          },
        ],
      };

      const request = new Request(
        "https://api.openai.com/v1/chat/completions",
        {
          method: "POST",
          headers: {
            "Content-Type": "application/json",
            Authorization: `Bearer ${this.apiKey}`,
          },
          body: JSON.stringify(requestPayload),
        }
      );
      // More about the fetch API: https://developers.snap.com/spectacles/about-spectacles-features/apis/fetch
      let response = await this.internetModule.fetch(request);
      if (response.status === 200) {
        let responseData = await response.json();
        this.textOutput.text = responseData.choices[0].message.content;
        print(responseData.choices[0].message.content);

        // Call TTS to generate and play speech from the response
        if (this.ttsComponent) {
          this.ttsComponent.generateAndPlaySpeech(
            responseData.choices[0].message.content
          );
        }
      } else {
        print("Failure: response not successful");
      }
    } catch (error) {
      print("Error: " + error);
    } finally {
      this.isProcessing = false;
    }
  }

  // More about encodeTextureToBase64: https://platform.openai.com/docs/guides/vision or https://developers.snap.com/api/lens-studio/Classes/OtherClasses#Base64
  encodeTextureToBase64(texture) {
    return new Promise((resolve, reject) => {
      Base64.encodeTextureAsync(
        texture,
        resolve,
        reject,
        CompressionQuality.LowQuality,
        EncodingType.Jpg
      );
    });
  }
 }


 TTS OPEN AI 

 @component
 export class TextToSpeechOpenAI extends BaseScriptComponent {
  @input audioComponent: AudioComponent;
  @input audioOutputAsset: Asset;

  @input
  @widget(
    new ComboBoxWidget()
      .addItem("Alloy", "alloy")
      .addItem("Echo", "echo")
      .addItem("Fable", "fable")
      .addItem("Onyx", "onyx")
      .addItem("Nova", "nova")
      .addItem("Shimmer", "shimmer")
  )
  voice: string = "alloy"; // Default voice selection

  apiKey: string = "Insert your Open AI Key";

  // Remote service module for fetching data
  private internetModule: InternetModule = require("LensStudio:InternetModule");

  onAwake() {
    if (!this.internetModule || !this.audioComponent || !this.apiKey) {
      print("Remote Service Module, Audio Component, or API key is missing.");
      return;
    }

    if (!this.audioOutputAsset) {
      print(
        "Audio Output asset is not assigned. Please assign an Audio Output asset in the Inspector."
      );
      return;
    }

    this.generateAndPlaySpeech("TextToSpeechOpenAI Ready!");
  }

  public async generateAndPlaySpeech(inputText: string) {
    if (!inputText) {
      print("No text provided for speech synthesis.");
      return;
    }

    try {
      const requestPayload = {
        model: "tts-1",
        voice: this.voice,
        input: inputText,
        response_format: "pcm",
      };

      const request = new Request("https://api.openai.com/v1/audio/speech", {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
          Authorization: `Bearer ${this.apiKey}`,
        },
        body: JSON.stringify(requestPayload),
      });

      print("Sending request to OpenAI...");

      let response = await this.internetModule.fetch(request);
      print("Response status: " + response.status);

      if (response.status === 200) {
        try {
          const audioData = await response.bytes();
          print("Received audio data, length: " + audioData.length);

          if (!this.audioOutputAsset) {
            throw new Error("Audio Output asset is not assigned");
          }

          const track = this.getAudioTrackFromData(audioData);
          this.audioComponent.audioTrack = track;
          this.audioComponent.play(1);

          print("Playing speech: " + inputText);
        } catch (processError) {
          print("Error processing audio data: " + processError);
        }
      } else {
        const errorText = await response.text();
        print("API Error: " + response.status + " - " + errorText);
      }
    } catch (error) {
      print("Error generating speech: " + error);
    }
  }

  getAudioTrackFromData = (audioData: Uint8Array): AudioTrackAsset => {
    let outputAudioTrack = this.audioOutputAsset as AudioTrackAsset; // Use the assigned asset
    if (!outputAudioTrack) {
      throw new Error("Failed to get Audio Output asset");
    }

    const sampleRate = 24000;

    const BUFFER_SIZE = audioData.length / 2;
    print("Processing buffer size: " + BUFFER_SIZE);

    var audioOutput = outputAudioTrack.control as AudioOutputProvider;
    if (!audioOutput) {
      throw new Error("Failed to get audio output control");
    }

    audioOutput.sampleRate = sampleRate;
    var data = new Float32Array(BUFFER_SIZE);

    // Convert PCM16 to Float32
    for (let i = 0, j = 0; i < audioData.length; i += 2, j++) {
      const sample = ((audioData[i] | (audioData[i + 1] << 8)) << 16) >> 16;
      data[j] = sample / 32768;
    }

    const shape = new vec3(BUFFER_SIZE, 1, 1);
    shape.x = audioOutput.getPreferredFrameSize();

    // Enqueue audio frames in chunks
    let i = 0;
    while (i < BUFFER_SIZE) {
      try {
        const chunkSize = Math.min(shape.x, BUFFER_SIZE - i);
        shape.x = chunkSize;
        audioOutput.enqueueAudioFrame(data.subarray(i, i + chunkSize), shape);
        i += chunkSize;
      } catch (e) {
        throw new Error("Failed to enqueue audio frame - " + e);
      }
    }

    return outputAudioTrack;
  };
 }
	VISION OPEN AI

	import { Interactable } from "SpectaclesInteractionKit.lspkg/Components/Interaction/Interactable/Interactable";
	import { InteractorEvent } from "SpectaclesInteractionKit.lspkg/Core/Interactor/InteractorEvent";
	import { SIK } from "SpectaclesInteractionKit.lspkg/SIK";
	import { TextToSpeechOpenAI } from "./TextToSpeechOpenAI";

	@component
	export class VisionOpenAI extends BaseScriptComponent {
	@input textInput: Text;
	@input textOutput: Text;
	@input image: Image;
	@input interactable: Interactable;
	@input ttsComponent: TextToSpeechOpenAI;

	apiKey: string = "Insert your Open AI Key";

	// Remote service module for fetching data
	private internetModule: InternetModule = require("LensStudio:InternetModule");

	private isProcessing: boolean = false;

	onAwake() {
	this.createEvent("OnStartEvent").bind(() => {
	this.onStart();
	});
	}

	onStart() {
	let interactionManager = SIK.InteractionManager;

	// Define the desired callback logic for the relevant Interactable event.
	let onTriggerEndCallback = (event: InteractorEvent) => {
	this.handleTriggerEnd(event);
	};

	this.interactable.onInteractorTriggerEnd(onTriggerEndCallback);
	}

	async handleTriggerEnd(eventData) {
	if (this.isProcessing) {
	print("A request is already in progress. Please wait.");
	return;
	}

	if (!this.textInput.text \|\| !this.image \|\| !this.apiKey) {
	print("Text, Image, or API key input is missing");
	return;
	}

	try {
	this.isProcessing = true;

	// Access the texture from the image component
	const texture = this.image.mainPass.baseTex;
	if (!texture) {
	print("Texture not found in the image component.");
	return;
	}

	const base64Image = await this.encodeTextureToBase64(texture);

	const requestPayload = {
	model: "gpt-4o-mini",
	messages: [
	{
	role: "system",
	content:
	"You are a helpful AI assistant that works for Snapchat that has access to the view that the user is looking at using Augmented Reality Glasses." +
	" The user is asking for help with the following image and text. Keep it short like under 30 words. Be a little funny and keep it positive.",
	},
	{
	role: "user",
	content: [
	{ type: "text", text: this.textInput.text },
	{
	type: "image_url",
	image_url: {
	url: `data:image/jpeg;base64,${base64Image}`,
	},
	},
	],
	},
	],
	};

	const request = new Request(
	"https://api.openai.com/v1/chat/completions",
	{
	method: "POST",
	headers: {
	"Content-Type": "application/json",
	Authorization: `Bearer ${this.apiKey}`,
	},
	body: JSON.stringify(requestPayload),
	}
	);
	// More about the fetch API: https://developers.snap.com/spectacles/about-spectacles-features/apis/fetch
	let response = await this.internetModule.fetch(request);
	if (response.status === 200) {
	let responseData = await response.json();
	this.textOutput.text = responseData.choices[0].message.content;
	print(responseData.choices[0].message.content);

	// Call TTS to generate and play speech from the response
	if (this.ttsComponent) {
	this.ttsComponent.generateAndPlaySpeech(
	responseData.choices[0].message.content
	);
	}
	} else {
	print("Failure: response not successful");
	}
	} catch (error) {
	print("Error: " + error);
	} finally {
	this.isProcessing = false;
	}
	}

	// More about encodeTextureToBase64: https://platform.openai.com/docs/guides/vision or https://developers.snap.com/api/lens-studio/Classes/OtherClasses#Base64
	encodeTextureToBase64(texture) {
	return new Promise((resolve, reject) => {
	Base64.encodeTextureAsync(
	texture,
	resolve,
	reject,
	CompressionQuality.LowQuality,
	EncodingType.Jpg
	);
	});
	}
	}


	TTS OPEN AI

	@component
	export class TextToSpeechOpenAI extends BaseScriptComponent {
	@input audioComponent: AudioComponent;
	@input audioOutputAsset: Asset;

	@input
	@widget(
	new ComboBoxWidget()
	.addItem("Alloy", "alloy")
	.addItem("Echo", "echo")
	.addItem("Fable", "fable")
	.addItem("Onyx", "onyx")
	.addItem("Nova", "nova")
	.addItem("Shimmer", "shimmer")
	)
	voice: string = "alloy"; // Default voice selection

	apiKey: string = "Insert your Open AI Key";

	// Remote service module for fetching data
	private internetModule: InternetModule = require("LensStudio:InternetModule");

	onAwake() {
	if (!this.internetModule \|\| !this.audioComponent \|\| !this.apiKey) {
	print("Remote Service Module, Audio Component, or API key is missing.");
	return;
	}

	if (!this.audioOutputAsset) {
	print(
	"Audio Output asset is not assigned. Please assign an Audio Output asset in the Inspector."
	);
	return;
	}

	this.generateAndPlaySpeech("TextToSpeechOpenAI Ready!");
	}

	public async generateAndPlaySpeech(inputText: string) {
	if (!inputText) {
	print("No text provided for speech synthesis.");
	return;
	}

	try {
	const requestPayload = {
	model: "tts-1",
	voice: this.voice,
	input: inputText,
	response_format: "pcm",
	};

	const request = new Request("https://api.openai.com/v1/audio/speech", {
	method: "POST",
	headers: {
	"Content-Type": "application/json",
	Authorization: `Bearer ${this.apiKey}`,
	},
	body: JSON.stringify(requestPayload),
	});

	print("Sending request to OpenAI...");

	let response = await this.internetModule.fetch(request);
	print("Response status: " + response.status);

	if (response.status === 200) {
	try {
	const audioData = await response.bytes();
	print("Received audio data, length: " + audioData.length);

	if (!this.audioOutputAsset) {
	throw new Error("Audio Output asset is not assigned");
	}

	const track = this.getAudioTrackFromData(audioData);
	this.audioComponent.audioTrack = track;
	this.audioComponent.play(1);

	print("Playing speech: " + inputText);
	} catch (processError) {
	print("Error processing audio data: " + processError);
	}
	} else {
	const errorText = await response.text();
	print("API Error: " + response.status + " - " + errorText);
	}
	} catch (error) {
	print("Error generating speech: " + error);
	}
	}

	getAudioTrackFromData = (audioData: Uint8Array): AudioTrackAsset => {
	let outputAudioTrack = this.audioOutputAsset as AudioTrackAsset; // Use the assigned asset
	if (!outputAudioTrack) {
	throw new Error("Failed to get Audio Output asset");
	}

	const sampleRate = 24000;

	const BUFFER_SIZE = audioData.length / 2;
	print("Processing buffer size: " + BUFFER_SIZE);

	var audioOutput = outputAudioTrack.control as AudioOutputProvider;
	if (!audioOutput) {
	throw new Error("Failed to get audio output control");
	}

	audioOutput.sampleRate = sampleRate;
	var data = new Float32Array(BUFFER_SIZE);

	// Convert PCM16 to Float32
	for (let i = 0, j = 0; i < audioData.length; i += 2, j++) {
	const sample = ((audioData[i] \| (audioData[i + 1] << 8)) << 16) >> 16;
	data[j] = sample / 32768;
	}

	const shape = new vec3(BUFFER_SIZE, 1, 1);
	shape.x = audioOutput.getPreferredFrameSize();

	// Enqueue audio frames in chunks
	let i = 0;
	while (i < BUFFER_SIZE) {
	try {
	const chunkSize = Math.min(shape.x, BUFFER_SIZE - i);
	shape.x = chunkSize;
	audioOutput.enqueueAudioFrame(data.subarray(i, i + chunkSize), shape);
	i += chunkSize;
	} catch (e) {
	throw new Error("Failed to enqueue audio frame - " + e);
	}
	}

	return outputAudioTrack;
	};
	}