Created
December 29, 2024 23:57
-
-
Save anj1/d322f0918499157aeab0f51947ae73f5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ollama | |
import feedparser | |
import time | |
from datetime import datetime | |
sys_prompt = """ | |
Your task is to judge whether an article title and abstract appeals to a specific researcher. This researcher specializes in computational modeling and analysis of complex biological systems, with particular focus on: | |
.... | |
You will be given a series of article titles and abstracts. For each abstract, you should decide whether the article is likely to be of interest to the researcher. You can respond with 'yes' or 'no' for each abstract. | |
If you are unsure, you can respond with 'unsure', but only if you are truly uncertain. | |
But ONLY respond with 'yes', 'no', or 'unsure'. Do not say anything else. Do not walk through your reasoning. Your answers should be single-word responses. Any answers longer than one word will be ignored and terminated prematurely. Any answers other than 'yes', 'no' and 'unsure' will be IGNORED. | |
Ignore html tags and links in the abstracts. Only consider the text content. | |
""" | |
def determine_interest(article_title, article_abstract): | |
response = ollama.chat(model='qwq', messages=[ | |
{ | |
'role': 'system', | |
'content': sys_prompt, | |
}, | |
{ | |
'role': 'user', | |
'content': article_title, | |
}, | |
{ | |
'role': 'user', | |
'content': article_abstract, | |
}, | |
]) | |
yes_or_no = response['message']['content'].strip().lower() | |
if yes_or_no in ['yes', 'no', 'unsure']: | |
return yes_or_no | |
else: | |
return "Invalid response" | |
def fetch_scientific_feeds(): | |
# Dictionary of RSS feed URLs for major scientific journals | |
scientific_feeds = { | |
'nature': 'http://feeds.nature.com/nature/rss/current', | |
'science': 'http://science.sciencemag.org/rss/express.xml', | |
'cell': 'http://www.cell.com/cell/current.rss', | |
'plos_one': 'http://feeds.plos.org/plosone/PLoSONE', | |
} | |
articles = [] | |
for source, feed_url in scientific_feeds.items(): | |
try: | |
# Parse the feed | |
feed = feedparser.parse(feed_url) | |
# Process each entry in the feed | |
for entry in feed.entries: | |
article = { | |
'source': source, | |
'title': entry.title, | |
'date': entry.get('published', 'No date available'), | |
'link': entry.link, | |
# Some feeds use 'summary' while others use 'description' | |
'abstract': entry.get('summary', entry.get('description', 'No abstract available')), | |
} | |
articles.append(article) | |
interest = determine_interest(article['title'], article['abstract']) | |
print(f"Interest in article '{article['title']}': {interest}") | |
# Be nice to the servers - add a small delay between requests | |
time.sleep(1) | |
except Exception as e: | |
print(f"Error fetching {source} feed: {str(e)}") | |
return articles | |
def save_articles(articles, filename=None): | |
if filename is None: | |
filename = f"scientific_articles_{datetime.now().strftime('%Y%m%d')}.txt" | |
with open(filename, 'w', encoding='utf-8') as f: | |
for article in articles: | |
f.write(f"Source: {article['source']}\n") | |
f.write(f"Title: {article['title']}\n") | |
f.write(f"Date: {article['date']}\n") | |
f.write(f"Link: {article['link']}\n") | |
f.write(f"Abstract:\n{article['abstract']}\n") | |
f.write("\n" + "="*80 + "\n\n") | |
def main(): | |
print("Fetching scientific articles...") | |
articles = fetch_scientific_feeds() | |
if articles: | |
save_articles(articles) | |
print(f"Successfully fetched {len(articles)} articles") | |
print("Articles have been saved to file") | |
else: | |
print("No articles were fetched") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment