henryamster · March 20, 2023 21:06
diff --git a/interfacifyTS b/interfacifyTS
 import http.client, urllib.parse, json
 import requests
 from bs4 import BeautifulSoup
 import openai
 import nltk

 # Download the NLTK Punkt tokenizer model
 nltk.download('punkt')

 # Set up your API keys
 BING_API_KEY = os.environ['BING_SEARCH_V7_SUBSCRIPTION_KEY']
 OPENAI_API_KEY = os.environ['OPENAI_API_KEY']

 # Configure OpenAI library
 openai.api_key = OPENAI_API_KEY

 def extract_full_content(url):
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    for script in soup(["script", "style"]):
        script.decompose()

    full_text = ' '.join(soup.stripped_strings)

    return full_text

 def truncate_text(text, max_tokens=4096):
    tokens = nltk.word_tokenize(text)
    truncated_tokens = tokens[:max_tokens]
    return ' '.join(truncated_tokens)

 def extract_pertinent_information(prompt):
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=1024,
        n=1,
        stop=None,
        temperature=0.5,
    )
    return response.choices[0].text.strip()

 def generate_entries(prompt):
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=2048,
        n=1,
        stop=None,
        temperature=0.5,
    )
    return response.choices[0].text.strip().split("\n")

 term = 'dress shirt'
 subscriptionKey = BING_API_KEY
 host = 'api.bing.microsoft.com'
 path = '/v7.0/search'

 params = '?q=' + urllib.parse.quote(term) + '&count=5'

 headers = {'Ocp-Apim-Subscription-Key': subscriptionKey}

 conn = http.client.HTTPSConnection(host)
 conn.request("GET", path + params, headers=headers)

 response = conn.getresponse()
 results = response.read()
 results = json.loads(results)

 pertinent_information_list = []

 for i in range(min(len(results['webPages']['value']), 5)):
    url = results['webPages']['value'][i]['url']
    
    full_text_content = extract_full_content(url)
    
    # Truncate text to fit within GPT-3's context window of 4096 tokens (assuming GPT-4 has the same limit)
    truncated_text_content = truncate_text(full_text_content)

    extraction_prompt = f"Extract only pertinent information related to crafting a perfect typescript interface for {term} from the following content: {truncated_text_content}"
    
    pertinent_info_gpt3 = extract_pertinent_information(extraction_prompt)
    
    pertinent_information_list.append({
        "source_url": url,
        "pertinent_information": pertinent_info_gpt3
    })

 # Generate ten entries matching the perfect JSON schema using GPT-3
 pertinent_info_combined_str =''.join([f"\n{i+1}. {pertinent_information_list[i]['pertinent_information']}" for i in range(len(pertinent_information_list))])
 generate_entries_prompt=f"Based on the following pertinent information related to crafting a perfect typescript interface for {term}:{pertinent_info_combined_str}\nGenerate 10 entries matching the perfect JSON schema:"

 generated_entries_gpt3=generate_entries(generate_entries_prompt)

 # Display the results in JSON format
 output_json={
  "input": term,
  "pertinent_information_list": pertinent_information_list,
  "generated_entries": generated_entries_gpt3
 }

 print(json.dumps(output_json, indent=2))
	import http.client, urllib.parse, json
	import requests
	from bs4 import BeautifulSoup
	import openai
	import nltk

	# Download the NLTK Punkt tokenizer model
	nltk.download('punkt')

	# Set up your API keys
	BING_API_KEY = os.environ['BING_SEARCH_V7_SUBSCRIPTION_KEY']
	OPENAI_API_KEY = os.environ['OPENAI_API_KEY']

	# Configure OpenAI library
	openai.api_key = OPENAI_API_KEY

	def extract_full_content(url):
	page = requests.get(url)
	soup = BeautifulSoup(page.content, 'html.parser')

	for script in soup(["script", "style"]):
	script.decompose()

	full_text = ' '.join(soup.stripped_strings)

	return full_text

	def truncate_text(text, max_tokens=4096):
	tokens = nltk.word_tokenize(text)
	truncated_tokens = tokens[:max_tokens]
	return ' '.join(truncated_tokens)

	def extract_pertinent_information(prompt):
	response = openai.Completion.create(
	engine="text-davinci-003",
	prompt=prompt,
	max_tokens=1024,
	n=1,
	stop=None,
	temperature=0.5,
	)
	return response.choices[0].text.strip()

	def generate_entries(prompt):
	response = openai.Completion.create(
	engine="text-davinci-003",
	prompt=prompt,
	max_tokens=2048,
	n=1,
	stop=None,
	temperature=0.5,
	)
	return response.choices[0].text.strip().split("\n")

	term = 'dress shirt'
	subscriptionKey = BING_API_KEY
	host = 'api.bing.microsoft.com'
	path = '/v7.0/search'

	params = '?q=' + urllib.parse.quote(term) + '&count=5'

	headers = {'Ocp-Apim-Subscription-Key': subscriptionKey}

	conn = http.client.HTTPSConnection(host)
	conn.request("GET", path + params, headers=headers)

	response = conn.getresponse()
	results = response.read()
	results = json.loads(results)

	pertinent_information_list = []

	for i in range(min(len(results['webPages']['value']), 5)):
	url = results['webPages']['value'][i]['url']

	full_text_content = extract_full_content(url)

	# Truncate text to fit within GPT-3's context window of 4096 tokens (assuming GPT-4 has the same limit)
	truncated_text_content = truncate_text(full_text_content)

	extraction_prompt = f"Extract only pertinent information related to crafting a perfect typescript interface for {term} from the following content: {truncated_text_content}"

	pertinent_info_gpt3 = extract_pertinent_information(extraction_prompt)

	pertinent_information_list.append({
	"source_url": url,
	"pertinent_information": pertinent_info_gpt3
	})

	# Generate ten entries matching the perfect JSON schema using GPT-3
	pertinent_info_combined_str =''.join([f"\n{i+1}. {pertinent_information_list[i]['pertinent_information']}" for i in range(len(pertinent_information_list))])
	generate_entries_prompt=f"Based on the following pertinent information related to crafting a perfect typescript interface for {term}:{pertinent_info_combined_str}\nGenerate 10 entries matching the perfect JSON schema:"

	generated_entries_gpt3=generate_entries(generate_entries_prompt)

	# Display the results in JSON format
	output_json={
	"input": term,
	"pertinent_information_list": pertinent_information_list,
	"generated_entries": generated_entries_gpt3
	}

	print(json.dumps(output_json, indent=2))