viking2917 · August 7, 2025 16:40
diff --git a/gemini.ts b/gemini.ts
 const GOOGLE_API_KEY = process.env.GOOGLE_API_KEY; // Ensure this is set as an environment variable
 if (!GOOGLE_API_KEY) {
  console.error("Error: GOOGLE_API_KEY environment variable is not set.");
  process.exit(1);
 }


 // --- Prompt that governs the extraction ---
 const defaultPrompt = `Tell me the music mentioned in the following text. 
 Order the artists by their importance in the article. 
 Order the recordings by their order of mention in the article. 
 For each recording list the artists and/or composers if they are known.
 Be sure to include all artists mentioned on recordings into the list of artists. 
 Give me a short summary of the article, no longer than three sentences. 
 Also give me three tags that describe the article: one for the type of article (interview, review, etc), 
 one for the sub-genre of jazz (bebop, classic jazz, fusion, etc), and 
 one for the atmosphere and flavor of the review ("contemplative, outstanding, vibrant," etc)
 Pretend you are a jazz aficianado. rate the text for usefulness and writing quality, compared to similar blog posts. assign a score between 0 and 1. 
 Also assign a sentiment score between 0 and 1 depending on how favorable the article is.
 `;
 // Note: returns bogus ids, if you ask: "For each musician, composer or artist, give me the musicbrainz id, spotify id, bandcampid, and appleid if it is available. "


 // --- Define the JSON Schema ---
 // This schema tells Gemini exactly how you want your data structured.
 const musicSchema = {
  type: "object",
  properties: {
    recordings: {
      type: "array",
      description: "A list of all recordings mentioned in the text",
      items: {
        type: "object",
        properties: {
          name: { type: "string", description: "Name of the recording" },
          type: { type: "string", description: "Type of recording, e.g. a track or an album" },
          // "musicbrainz_id": { type: "string", description: "Musicbrainz id."},
          // "spotify_id": { type: "string", description: "Spotify id."},
          // "bandcamp_id": { type: "string", description: "Bandcamp id."},
          // "apple_id": { type: "string", description: "Apple id."},
          artists: {
            type: "array",
            description: "List of artists associated with the recording",
            items: { type: "string", description: "Name of the artist" }
          },
          composers: {
            type: "array",
            description: "List of composers associated with the recording",
            items: { type: "string", description: "Name of the composer" }
          }
        },
        required: ["name"]
      }
    },
    artists: {
      type: "array",
      description: "A list of all musical artists mentioned in the text",
      items: {
        type: "object",
        properties: {
          name: { type: "string", description: "Name of the musical artist" },
          role: { type: "string", description: "Description of artist role." },
          // "musicbrainz_id": { type: "string", description: "Musicbrainz id." },
          // "spotify_id": { type: "string", description: "Spotify id." },
          // "bandcamp_id": { type: "string", description: "Bandcamp id." },
          // "apple_id": { type: "string", description: "Apple id." }
        },
        required: ["name"]
      }
    },
    summary: { type: "string", description: "Gemini's summary of the article" },

    // something like:  review • classic jazz • outstanding 
    tags: {
      type: "array",
      description: "Three tags that describe the article",
      items: { type: "string", description: "A tag describing the type of article, sub-genre of jazz, and atmosphere/flavor of the review" }
    },
    usefulnessScore: {
      type: "number",
      description: "A score between 0 and 1 indicating the usefulness of the article compared to similar blog posts",
      minimum: 0,
      maximum: 1
    },
    writingQualityScore: {
      type: "number",
      description: "A score between 0 and 1 indicating the quality of writing in the article",
      minimum: 0,
      maximum: 1
    },
    sentimentScore: {
      type: "number",
      description: "A score between 0 and 1 indicating the sentiment of the article",
      minimum: 0,
      maximum: 1
    },
  },
  required: ["recordings", "artists"]
 };

 export async function geminiExtractStructuredDataFromText(documentText: string, promptText = defaultPrompt, schema = musicSchema) {
  const modelName = 'gemini-2.5-flash'; // Or 'gemini-2.0-flash', or 'gemini-2.5-pro' for more capability
  try {
    const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${modelName}:generateContent?key=${GOOGLE_API_KEY}`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json', },
      body: JSON.stringify({
        contents: [{
          parts: [
            { text: documentText }, // The in-memory text block
            { text: promptText }    // Your natural language prompt
          ]
        }],
        generationConfig: {
          responseMimeType: "application/json",   // Important: Request JSON output
          responseSchema: schema,                 // Your JSON schema goes here
          temperature: 0.0                        // For deterministic output
        },
        safetySettings: [ // Adjust safety settings as needed for your application
          {
            category: "HARM_CATEGORY_HARASSMENT",
            threshold: "BLOCK_NONE"
          },
          {
            category: "HARM_CATEGORY_HATE_SPEECH",
            threshold: "BLOCK_NONE"
          },
          {
            category: "HARM_CATEGORY_SEXUALLY_EXPLICIT",
            threshold: "BLOCK_NONE"
          },
          {
            category: "HARM_CATEGORY_DANGEROUS_CONTENT",
            threshold: "BLOCK_NONE"
          }
        ]
      })
    });

    if (!response.ok) {
      const errorData = await response.json();
      throw new Error(`API call failed: ${response.status} - ${JSON.stringify(errorData, null, 2)}`);
    }

    const data = await response.json();
    // The structured data will be directly in the `text` field of the first part in the response
    const extractedJsonString = data.candidates[0].content.parts[0].text;

    // Parse the JSON string into a JavaScript object
    let jObj = JSON.parse(extractedJsonString);
    return jObj;

  } catch (error) {
    console.error('Error extracting data:', error);
    return null;
  }
 }
	const GOOGLE_API_KEY = process.env.GOOGLE_API_KEY; // Ensure this is set as an environment variable
	if (!GOOGLE_API_KEY) {
	console.error("Error: GOOGLE_API_KEY environment variable is not set.");
	process.exit(1);
	}


	// --- Prompt that governs the extraction ---
	const defaultPrompt = `Tell me the music mentioned in the following text.
	Order the artists by their importance in the article.
	Order the recordings by their order of mention in the article.
	For each recording list the artists and/or composers if they are known.
	Be sure to include all artists mentioned on recordings into the list of artists.
	Give me a short summary of the article, no longer than three sentences.
	Also give me three tags that describe the article: one for the type of article (interview, review, etc),
	one for the sub-genre of jazz (bebop, classic jazz, fusion, etc), and
	one for the atmosphere and flavor of the review ("contemplative, outstanding, vibrant," etc)
	Pretend you are a jazz aficianado. rate the text for usefulness and writing quality, compared to similar blog posts. assign a score between 0 and 1.
	Also assign a sentiment score between 0 and 1 depending on how favorable the article is.
	`;
	// Note: returns bogus ids, if you ask: "For each musician, composer or artist, give me the musicbrainz id, spotify id, bandcampid, and appleid if it is available. "


	// --- Define the JSON Schema ---
	// This schema tells Gemini exactly how you want your data structured.
	const musicSchema = {
	type: "object",
	properties: {
	recordings: {
	type: "array",
	description: "A list of all recordings mentioned in the text",
	items: {
	type: "object",
	properties: {
	name: { type: "string", description: "Name of the recording" },
	type: { type: "string", description: "Type of recording, e.g. a track or an album" },
	// "musicbrainz_id": { type: "string", description: "Musicbrainz id."},
	// "spotify_id": { type: "string", description: "Spotify id."},
	// "bandcamp_id": { type: "string", description: "Bandcamp id."},
	// "apple_id": { type: "string", description: "Apple id."},
	artists: {
	type: "array",
	description: "List of artists associated with the recording",
	items: { type: "string", description: "Name of the artist" }
	},
	composers: {
	type: "array",
	description: "List of composers associated with the recording",
	items: { type: "string", description: "Name of the composer" }
	}
	},
	required: ["name"]
	}
	},
	artists: {
	type: "array",
	description: "A list of all musical artists mentioned in the text",
	items: {
	type: "object",
	properties: {
	name: { type: "string", description: "Name of the musical artist" },
	role: { type: "string", description: "Description of artist role." },
	// "musicbrainz_id": { type: "string", description: "Musicbrainz id." },
	// "spotify_id": { type: "string", description: "Spotify id." },
	// "bandcamp_id": { type: "string", description: "Bandcamp id." },
	// "apple_id": { type: "string", description: "Apple id." }
	},
	required: ["name"]
	}
	},
	summary: { type: "string", description: "Gemini's summary of the article" },

	// something like: review • classic jazz • outstanding
	tags: {
	type: "array",
	description: "Three tags that describe the article",
	items: { type: "string", description: "A tag describing the type of article, sub-genre of jazz, and atmosphere/flavor of the review" }
	},
	usefulnessScore: {
	type: "number",
	description: "A score between 0 and 1 indicating the usefulness of the article compared to similar blog posts",
	minimum: 0,
	maximum: 1
	},
	writingQualityScore: {
	type: "number",
	description: "A score between 0 and 1 indicating the quality of writing in the article",
	minimum: 0,
	maximum: 1
	},
	sentimentScore: {
	type: "number",
	description: "A score between 0 and 1 indicating the sentiment of the article",
	minimum: 0,
	maximum: 1
	},
	},
	required: ["recordings", "artists"]
	};

	export async function geminiExtractStructuredDataFromText(documentText: string, promptText = defaultPrompt, schema = musicSchema) {
	const modelName = 'gemini-2.5-flash'; // Or 'gemini-2.0-flash', or 'gemini-2.5-pro' for more capability
	try {
	const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${modelName}:generateContent?key=${GOOGLE_API_KEY}`, {
	method: 'POST',
	headers: { 'Content-Type': 'application/json', },
	body: JSON.stringify({
	contents: [{
	parts: [
	{ text: documentText }, // The in-memory text block
	{ text: promptText } // Your natural language prompt
	]
	}],
	generationConfig: {
	responseMimeType: "application/json", // Important: Request JSON output
	responseSchema: schema, // Your JSON schema goes here
	temperature: 0.0 // For deterministic output
	},
	safetySettings: [ // Adjust safety settings as needed for your application
	{
	category: "HARM_CATEGORY_HARASSMENT",
	threshold: "BLOCK_NONE"
	},
	{
	category: "HARM_CATEGORY_HATE_SPEECH",
	threshold: "BLOCK_NONE"
	},
	{
	category: "HARM_CATEGORY_SEXUALLY_EXPLICIT",
	threshold: "BLOCK_NONE"
	},
	{
	category: "HARM_CATEGORY_DANGEROUS_CONTENT",
	threshold: "BLOCK_NONE"
	}
	]
	})
	});

	if (!response.ok) {
	const errorData = await response.json();
	throw new Error(`API call failed: ${response.status} - ${JSON.stringify(errorData, null, 2)}`);
	}

	const data = await response.json();
	// The structured data will be directly in the `text` field of the first part in the response
	const extractedJsonString = data.candidates[0].content.parts[0].text;

	// Parse the JSON string into a JavaScript object
	let jObj = JSON.parse(extractedJsonString);
	return jObj;

	} catch (error) {
	console.error('Error extracting data:', error);
	return null;
	}
	}