Created
September 29, 2022 21:23
-
-
Save giannisdaras/f95f8edce951c065ef225f09175908df to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import openreview | |
from ml_collections import config_flags | |
from absl import app | |
from absl import flags | |
import requests | |
import os | |
import PyPDF2 | |
import re | |
from tqdm import tqdm | |
config_flags.DEFINE_config_file( | |
"config_file", | |
"client_config.py", | |
"Config file for client" | |
) | |
FLAGS = flags.FLAGS | |
# TOKEN | |
def find_citations(_): | |
config = FLAGS.config_file.unlock() | |
try: | |
os.mkdir("outputs") | |
except OSError as error: | |
print("Output directory already exists...") | |
client = openreview.Client(baseurl='https://api.openreview.net', username=config.username, password=config.password) | |
notes = openreview.tools.iterget_notes(client, invitation='ICLR.cc/2023/Conference/-/Blind_Submission') | |
notes = [x for x in notes] | |
print(f"Number of found papers: {len(notes)}") | |
filtered_papers = [] | |
for note in notes: | |
for keyword in config.keywords: | |
if keyword in note.content['title']: | |
filtered_papers.append(note) | |
if config.download: | |
url = f"https://openreview.net/pdf?id={note.id}" | |
response = requests.get(url) | |
with open(f'outputs/{len(filtered_papers)}.pdf', 'wb') as f: | |
f.write(response.content) | |
break | |
print(f"Number of papers with keyword: {len(filtered_papers)}") | |
paper_that_cite_us = [] | |
for paper in tqdm(filtered_papers): | |
# Open the pdf file | |
object = PyPDF2.PdfFileReader(f"outputs/{filtered_papers.index(paper) + 1}.pdf") | |
# Get number of pages | |
NumPages = object.getNumPages() | |
# Enter code here | |
String = "Daras" | |
# Extract text and do the search | |
for i in range(0, NumPages): | |
PageObj = object.getPage(i) | |
Text = PageObj.extractText() | |
if re.search(String,Text): | |
paper_that_cite_us.append(paper.content['title']) | |
print(f"{paper.content['title']}: cites {String}") | |
break | |
if __name__ == "__main__": | |
app.run(find_citations) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment