Skip to content

Instantly share code, notes, and snippets.

@anj1
Created December 29, 2024 23:57
Show Gist options
  • Save anj1/d322f0918499157aeab0f51947ae73f5 to your computer and use it in GitHub Desktop.
Save anj1/d322f0918499157aeab0f51947ae73f5 to your computer and use it in GitHub Desktop.
import ollama
import feedparser
import time
from datetime import datetime
sys_prompt = """
Your task is to judge whether an article title and abstract appeals to a specific researcher. This researcher specializes in computational modeling and analysis of complex biological systems, with particular focus on:
....
You will be given a series of article titles and abstracts. For each abstract, you should decide whether the article is likely to be of interest to the researcher. You can respond with 'yes' or 'no' for each abstract.
If you are unsure, you can respond with 'unsure', but only if you are truly uncertain.
But ONLY respond with 'yes', 'no', or 'unsure'. Do not say anything else. Do not walk through your reasoning. Your answers should be single-word responses. Any answers longer than one word will be ignored and terminated prematurely. Any answers other than 'yes', 'no' and 'unsure' will be IGNORED.
Ignore html tags and links in the abstracts. Only consider the text content.
"""
def determine_interest(article_title, article_abstract):
response = ollama.chat(model='qwq', messages=[
{
'role': 'system',
'content': sys_prompt,
},
{
'role': 'user',
'content': article_title,
},
{
'role': 'user',
'content': article_abstract,
},
])
yes_or_no = response['message']['content'].strip().lower()
if yes_or_no in ['yes', 'no', 'unsure']:
return yes_or_no
else:
return "Invalid response"
def fetch_scientific_feeds():
# Dictionary of RSS feed URLs for major scientific journals
scientific_feeds = {
'nature': 'http://feeds.nature.com/nature/rss/current',
'science': 'http://science.sciencemag.org/rss/express.xml',
'cell': 'http://www.cell.com/cell/current.rss',
'plos_one': 'http://feeds.plos.org/plosone/PLoSONE',
}
articles = []
for source, feed_url in scientific_feeds.items():
try:
# Parse the feed
feed = feedparser.parse(feed_url)
# Process each entry in the feed
for entry in feed.entries:
article = {
'source': source,
'title': entry.title,
'date': entry.get('published', 'No date available'),
'link': entry.link,
# Some feeds use 'summary' while others use 'description'
'abstract': entry.get('summary', entry.get('description', 'No abstract available')),
}
articles.append(article)
interest = determine_interest(article['title'], article['abstract'])
print(f"Interest in article '{article['title']}': {interest}")
# Be nice to the servers - add a small delay between requests
time.sleep(1)
except Exception as e:
print(f"Error fetching {source} feed: {str(e)}")
return articles
def save_articles(articles, filename=None):
if filename is None:
filename = f"scientific_articles_{datetime.now().strftime('%Y%m%d')}.txt"
with open(filename, 'w', encoding='utf-8') as f:
for article in articles:
f.write(f"Source: {article['source']}\n")
f.write(f"Title: {article['title']}\n")
f.write(f"Date: {article['date']}\n")
f.write(f"Link: {article['link']}\n")
f.write(f"Abstract:\n{article['abstract']}\n")
f.write("\n" + "="*80 + "\n\n")
def main():
print("Fetching scientific articles...")
articles = fetch_scientific_feeds()
if articles:
save_articles(articles)
print(f"Successfully fetched {len(articles)} articles")
print("Articles have been saved to file")
else:
print("No articles were fetched")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment