Last active
December 19, 2020 12:30
-
-
Save leondz/98073549a2feb23aaf455029fd010efd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # Script to create directories and index.md files for publications | |
| # for Academic Kickstart or Research Group Kickstart, taking reference | |
| # info from a Google Scholar CSV export. | |
| # Templates found here, https://wowchemy.com/templates/ | |
| # input: one csv file from command line e.g. from scholar export | |
| # output: a dir structure of the publications for import to kickstart | |
| import csv | |
| import os | |
| import re | |
| import sys | |
| out_prefix = "kickstart_pubs" | |
| try: | |
| os.mkdir(out_prefix) | |
| except: | |
| print('-- main output dir already exists, proceeding') | |
| # lazier than writing code to parse the authors dir: | |
| # this dict contains a search string for an author and their corresponding username on the site | |
| #username_search = {'erczynsk':'admin', 'iosici':'maci', 'rregaar':'jeno', 'einert':'phze', 'ristina':'crme'} | |
| #username_search = {} | |
| if not username_search: | |
| sys.exit('* Please, link some authors and usernames in the script source') | |
| with open(sys.argv[1]) as csv_file: | |
| reader = csv.DictReader(csv_file, delimiter=',', ) | |
| for r in reader: | |
| article_name = re.sub(r'[^A-Za-z]', '', r['Title']) | |
| if not article_name: | |
| print('No usable letters found for', r['Title'], ' nickname - skipping') | |
| continue | |
| print('- Creating', article_name, 'for', r['Title']) | |
| target_dir = out_prefix + '/' + article_name | |
| try: | |
| os.mkdir(target_dir) | |
| except: | |
| pass | |
| if os.path.exists(target_dir + '/index.md'): | |
| print('-- Markdown already exists, skipping (no clobber)') | |
| continue | |
| with open(target_dir + '/index.md', 'w') as markdown: | |
| markdown.write('---\n') | |
| title = '"' + r['Title'].replace('"', '\\"') + '"' # sanitise title | |
| markdown.write('title: ' + title + "\n") | |
| markdown.write('authors:\n') | |
| authors = r['\ufeffAuthors'].split('; ') | |
| username = '' | |
| for author in authors: | |
| if author: | |
| username = '' | |
| for search in username_search: | |
| if search in author: | |
| username = username_search[search] | |
| if username: | |
| markdown.write('- ' + username + '\n') | |
| else: | |
| authorparts = author.split(', ') # rotate last part to firstname if there are commas, e.g. "Assent, Ira" to "Ira Assent" | |
| authorname = authorparts[-1] + ' ' + ' '.join(authorparts[:-1]) | |
| markdown.write('- ' + authorname + '\n') | |
| markdown.write('date: "' + r['Year'] + '-01-01T00:00:00Z"\n') # scholar csv doesn't give finer granularity | |
| markdown.write('doi: ""\n') | |
| publication_type = 0 # detect publication type - would be easier from the bibtex | |
| if 'arxiv' in r['Number'].lower() or 'arxiv' in r['Publication'].lower(): | |
| publication_type = 3 | |
| print('-- guessing Preprint') | |
| elif 'proceedings' in r['Publication'].lower(): | |
| publication_type = 1 | |
| print('-- guessing Conferece') | |
| elif r['Publication'] == '': | |
| publication_type = 5 | |
| print('-- guessing Book') | |
| elif 'university' in r['Publisher'].lower(): | |
| publication_type = 7 | |
| print('-- guessing Thesis') | |
| elif r['Publication']: | |
| publication_type = 2 | |
| print('-- guessing Journal article') | |
| markdown.write('publication_types: ["'+str(publication_type)+'"]\n') | |
| publication = '"' +r['Publication'].replace('"', '\\"')+ '"' | |
| markdown.write('publication: ' + publication + '\n') | |
| markdown.write('abstract: \n') | |
| markdown.write('summary: \n') | |
| markdown.write(""" | |
| tags: | |
| #- Source Themes | |
| #featured: true | |
| links: | |
| #- name: Custom Link | |
| # url: http://example.org | |
| #url_pdf: '#' | |
| #url_code: '#' | |
| #url_dataset: '#' | |
| #url_poster: '#' | |
| #url_project: '' | |
| #url_slides: '' | |
| #url_source: '#' | |
| #url_video: '#' | |
| # Featured image | |
| # To use, add an image named `featured.jpg/png` to your page's folder. | |
| image: | |
| # caption: 'Image credit: [**Unsplash**](https://unsplash.com/photos/pLCdAaMFLTE)' | |
| focal_point: "" | |
| preview_only: false | |
| # Associated Projects (optional). | |
| # Associate this publication with one or more of your projects. | |
| # Simply enter your project's folder or file name without extension. | |
| # E.g. `internal-project` references `content/project/internal-project/index.md`. | |
| # Otherwise, set `projects: []`. | |
| projects: [] | |
| #- internal-project | |
| --- | |
| """) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment