anj1 · December 29, 2024 23:57
diff --git a/rss-filter-llm.py b/rss-filter-llm.py
 import ollama
 import feedparser
 import time
 from datetime import datetime


 sys_prompt = """
 Your task is to judge whether an article title and abstract appeals to a specific researcher. This researcher specializes in computational modeling and analysis of complex biological systems, with particular focus on:
 ....

 You will be given a series of article titles and abstracts. For each abstract, you should decide whether the article is likely to be of interest to the researcher. You can respond with 'yes' or 'no' for each abstract.
 If you are unsure, you can respond with 'unsure', but only if you are truly uncertain.

 But ONLY respond with 'yes', 'no', or 'unsure'. Do not say anything else. Do not walk through your reasoning. Your answers should be single-word responses. Any answers longer than one word will be ignored and terminated prematurely. Any answers other than 'yes', 'no' and 'unsure' will be IGNORED.

 Ignore html tags and links in the abstracts. Only consider the text content.
 """

 def determine_interest(article_title, article_abstract):
    response = ollama.chat(model='qwq', messages=[
      {
        'role': 'system',
        'content': sys_prompt,
      },
      {
        'role': 'user',
        'content': article_title,
      },
      {
        'role': 'user',
        'content': article_abstract,
      },
    ])
    
    yes_or_no = response['message']['content'].strip().lower()

    if yes_or_no in ['yes', 'no', 'unsure']:
        return yes_or_no
    else:
        return "Invalid response"



 def fetch_scientific_feeds():
    # Dictionary of RSS feed URLs for major scientific journals
    scientific_feeds = {
        'nature': 'http://feeds.nature.com/nature/rss/current',
        'science': 'http://science.sciencemag.org/rss/express.xml',
        'cell': 'http://www.cell.com/cell/current.rss',
        'plos_one': 'http://feeds.plos.org/plosone/PLoSONE',
    }
    
    articles = []
    
    for source, feed_url in scientific_feeds.items():
        try:
            # Parse the feed
            feed = feedparser.parse(feed_url)
            
            # Process each entry in the feed
            for entry in feed.entries:
                article = {
                    'source': source,
                    'title': entry.title,
                    'date': entry.get('published', 'No date available'),
                    'link': entry.link,
                    # Some feeds use 'summary' while others use 'description'
                    'abstract': entry.get('summary', entry.get('description', 'No abstract available')),
                }
                articles.append(article)

                interest = determine_interest(article['title'], article['abstract'])
                print(f"Interest in article '{article['title']}': {interest}")
                
            # Be nice to the servers - add a small delay between requests
            time.sleep(1)
            
        except Exception as e:
            print(f"Error fetching {source} feed: {str(e)}")
    
    return articles

 def save_articles(articles, filename=None):
    if filename is None:
        filename = f"scientific_articles_{datetime.now().strftime('%Y%m%d')}.txt"
    
    with open(filename, 'w', encoding='utf-8') as f:
        for article in articles:
            f.write(f"Source: {article['source']}\n")
            f.write(f"Title: {article['title']}\n")
            f.write(f"Date: {article['date']}\n")
            f.write(f"Link: {article['link']}\n")
            f.write(f"Abstract:\n{article['abstract']}\n")
            f.write("\n" + "="*80 + "\n\n")

 def main():
    print("Fetching scientific articles...")
    articles = fetch_scientific_feeds()
    
    if articles:
        save_articles(articles)
        print(f"Successfully fetched {len(articles)} articles")
        print("Articles have been saved to file")
    else:
        print("No articles were fetched")

 if __name__ == "__main__":
    main()
	import ollama
	import feedparser
	import time
	from datetime import datetime


	sys_prompt = """
	Your task is to judge whether an article title and abstract appeals to a specific researcher. This researcher specializes in computational modeling and analysis of complex biological systems, with particular focus on:
	....

	You will be given a series of article titles and abstracts. For each abstract, you should decide whether the article is likely to be of interest to the researcher. You can respond with 'yes' or 'no' for each abstract.
	If you are unsure, you can respond with 'unsure', but only if you are truly uncertain.

	But ONLY respond with 'yes', 'no', or 'unsure'. Do not say anything else. Do not walk through your reasoning. Your answers should be single-word responses. Any answers longer than one word will be ignored and terminated prematurely. Any answers other than 'yes', 'no' and 'unsure' will be IGNORED.

	Ignore html tags and links in the abstracts. Only consider the text content.
	"""

	def determine_interest(article_title, article_abstract):
	response = ollama.chat(model='qwq', messages=[
	{
	'role': 'system',
	'content': sys_prompt,
	},
	{
	'role': 'user',
	'content': article_title,
	},
	{
	'role': 'user',
	'content': article_abstract,
	},
	])

	yes_or_no = response['message']['content'].strip().lower()

	if yes_or_no in ['yes', 'no', 'unsure']:
	return yes_or_no
	else:
	return "Invalid response"



	def fetch_scientific_feeds():
	# Dictionary of RSS feed URLs for major scientific journals
	scientific_feeds = {
	'nature': 'http://feeds.nature.com/nature/rss/current',
	'science': 'http://science.sciencemag.org/rss/express.xml',
	'cell': 'http://www.cell.com/cell/current.rss',
	'plos_one': 'http://feeds.plos.org/plosone/PLoSONE',
	}

	articles = []

	for source, feed_url in scientific_feeds.items():
	try:
	# Parse the feed
	feed = feedparser.parse(feed_url)

	# Process each entry in the feed
	for entry in feed.entries:
	article = {
	'source': source,
	'title': entry.title,
	'date': entry.get('published', 'No date available'),
	'link': entry.link,
	# Some feeds use 'summary' while others use 'description'
	'abstract': entry.get('summary', entry.get('description', 'No abstract available')),
	}
	articles.append(article)

	interest = determine_interest(article['title'], article['abstract'])
	print(f"Interest in article '{article['title']}': {interest}")

	# Be nice to the servers - add a small delay between requests
	time.sleep(1)

	except Exception as e:
	print(f"Error fetching {source} feed: {str(e)}")

	return articles

	def save_articles(articles, filename=None):
	if filename is None:
	filename = f"scientific_articles_{datetime.now().strftime('%Y%m%d')}.txt"

	with open(filename, 'w', encoding='utf-8') as f:
	for article in articles:
	f.write(f"Source: {article['source']}\n")
	f.write(f"Title: {article['title']}\n")
	f.write(f"Date: {article['date']}\n")
	f.write(f"Link: {article['link']}\n")
	f.write(f"Abstract:\n{article['abstract']}\n")
	f.write("\n" + "="*80 + "\n\n")

	def main():
	print("Fetching scientific articles...")
	articles = fetch_scientific_feeds()

	if articles:
	save_articles(articles)
	print(f"Successfully fetched {len(articles)} articles")
	print("Articles have been saved to file")
	else:
	print("No articles were fetched")

	if __name__ == "__main__":
	main()