leondz · December 19, 2020 12:30
diff --git a/scholar2kickstart.py b/scholar2kickstart.py
 #!/usr/bin/env python3

 # Script to create directories and index.md files for publications
 # for Academic Kickstart or Research Group Kickstart, taking reference
 # info from a Google Scholar CSV export.

 # Templates found here, https://wowchemy.com/templates/

 # input: one csv file from command line e.g. from scholar export
 # output: a dir structure of the publications for import to kickstart

 import csv
 import os
 import re
 import sys

 out_prefix = "kickstart_pubs"

 try:
 	os.mkdir(out_prefix)
 except:
 	print('-- main output dir already exists, proceeding')

 # lazier than writing code to parse the authors dir:
 # this dict contains a search string for an author and their corresponding username on the site

 #username_search = {'erczynsk':'admin', 'iosici':'maci', 'rregaar':'jeno', 'einert':'phze', 'ristina':'crme'}
 #username_search = {}

 if not username_search:
 	sys.exit('* Please, link some authors and usernames in the script source')

 with open(sys.argv[1]) as csv_file:
 	reader = csv.DictReader(csv_file, delimiter=',', )
 	for r in reader:

 		article_name = re.sub(r'[^A-Za-z]', '', r['Title'])
 		if not article_name:
 			print('No usable letters found for', r['Title'], ' nickname - skipping')
 			continue

 		print('- Creating', article_name, 'for', r['Title'])
 		target_dir = out_prefix + '/' + article_name
 		try:
 			os.mkdir(target_dir)
 		except:
 			pass

 		if os.path.exists(target_dir + '/index.md'):
 			print('-- Markdown already exists, skipping (no clobber)')
 			continue

 		with open(target_dir + '/index.md', 'w') as markdown:
 			markdown.write('---\n')
 			title = '"' + r['Title'].replace('"', '\\"') + '"' # sanitise title
 			markdown.write('title: ' + title + "\n")
 			markdown.write('authors:\n')
 			authors = r['\ufeffAuthors'].split('; ')
 			username = ''
 			for author in authors:
 				if author:
 					username = ''
 					for search in username_search:
 						if search in author:
 							username = username_search[search]
 					if username:
 						markdown.write('- ' + username + '\n')
 					else:
 						authorparts = author.split(', ') # rotate last part to firstname if there are commas, e.g. "Assent, Ira" to "Ira Assent"
 						authorname = authorparts[-1] + ' ' + ' '.join(authorparts[:-1])
 						markdown.write('- ' + authorname + '\n')

 			markdown.write('date: "' + r['Year'] + '-01-01T00:00:00Z"\n') # scholar csv doesn't give finer granularity
 			markdown.write('doi: ""\n')

 			publication_type = 0 # detect publication type - would be easier from the bibtex
 			if 'arxiv' in r['Number'].lower() or 'arxiv' in r['Publication'].lower():
 				publication_type = 3
 				print('-- guessing Preprint')
 			elif 'proceedings' in r['Publication'].lower():
 				publication_type = 1
 				print('-- guessing Conferece')
 			elif r['Publication'] == '':
 				publication_type = 5
 				print('-- guessing Book')
 			elif 'university' in r['Publisher'].lower():
 				publication_type = 7
 				print('-- guessing Thesis')
 			elif r['Publication']:
 				publication_type = 2
 				print('-- guessing Journal article')				
 			markdown.write('publication_types: ["'+str(publication_type)+'"]\n')

 			publication = '"' +r['Publication'].replace('"', '\\"')+ '"'
 			markdown.write('publication: ' + publication + '\n')

 			markdown.write('abstract: \n')
 			markdown.write('summary: \n')

 			markdown.write("""

 tags:
 #- Source Themes
 #featured: true

 links:
 #- name: Custom Link
 #  url: http://example.org
 #url_pdf: '#'
 #url_code: '#'
 #url_dataset: '#'
 #url_poster: '#'
 #url_project: ''
 #url_slides: ''
 #url_source: '#'
 #url_video: '#'

 # Featured image
 # To use, add an image named `featured.jpg/png` to your page's folder. 
 image:
 #  caption: 'Image credit: [**Unsplash**](https://unsplash.com/photos/pLCdAaMFLTE)'
  focal_point: ""
  preview_only: false

 # Associated Projects (optional).
 #   Associate this publication with one or more of your projects.
 #   Simply enter your project's folder or file name without extension.
 #   E.g. `internal-project` references `content/project/internal-project/index.md`.
 #   Otherwise, set `projects: []`.
 projects: []
 #- internal-project

 ---
 """)
	#!/usr/bin/env python3

	# Script to create directories and index.md files for publications
	# for Academic Kickstart or Research Group Kickstart, taking reference
	# info from a Google Scholar CSV export.

	# Templates found here, https://wowchemy.com/templates/

	# input: one csv file from command line e.g. from scholar export
	# output: a dir structure of the publications for import to kickstart

	import csv
	import os
	import re
	import sys

	out_prefix = "kickstart_pubs"

	try:
	os.mkdir(out_prefix)
	except:
	print('-- main output dir already exists, proceeding')

	# lazier than writing code to parse the authors dir:
	# this dict contains a search string for an author and their corresponding username on the site

	#username_search = {'erczynsk':'admin', 'iosici':'maci', 'rregaar':'jeno', 'einert':'phze', 'ristina':'crme'}
	#username_search = {}

	if not username_search:
	sys.exit('* Please, link some authors and usernames in the script source')

	with open(sys.argv[1]) as csv_file:
	reader = csv.DictReader(csv_file, delimiter=',', )
	for r in reader:

	article_name = re.sub(r'[^A-Za-z]', '', r['Title'])
	if not article_name:
	print('No usable letters found for', r['Title'], ' nickname - skipping')
	continue

	print('- Creating', article_name, 'for', r['Title'])
	target_dir = out_prefix + '/' + article_name
	try:
	os.mkdir(target_dir)
	except:
	pass

	if os.path.exists(target_dir + '/index.md'):
	print('-- Markdown already exists, skipping (no clobber)')
	continue

	with open(target_dir + '/index.md', 'w') as markdown:
	markdown.write('---\n')
	title = '"' + r['Title'].replace('"', '\\"') + '"' # sanitise title
	markdown.write('title: ' + title + "\n")
	markdown.write('authors:\n')
	authors = r['\ufeffAuthors'].split('; ')
	username = ''
	for author in authors:
	if author:
	username = ''
	for search in username_search:
	if search in author:
	username = username_search[search]
	if username:
	markdown.write('- ' + username + '\n')
	else:
	authorparts = author.split(', ') # rotate last part to firstname if there are commas, e.g. "Assent, Ira" to "Ira Assent"
	authorname = authorparts[-1] + ' ' + ' '.join(authorparts[:-1])
	markdown.write('- ' + authorname + '\n')

	markdown.write('date: "' + r['Year'] + '-01-01T00:00:00Z"\n') # scholar csv doesn't give finer granularity
	markdown.write('doi: ""\n')

	publication_type = 0 # detect publication type - would be easier from the bibtex
	if 'arxiv' in r['Number'].lower() or 'arxiv' in r['Publication'].lower():
	publication_type = 3
	print('-- guessing Preprint')
	elif 'proceedings' in r['Publication'].lower():
	publication_type = 1
	print('-- guessing Conferece')
	elif r['Publication'] == '':
	publication_type = 5
	print('-- guessing Book')
	elif 'university' in r['Publisher'].lower():
	publication_type = 7
	print('-- guessing Thesis')
	elif r['Publication']:
	publication_type = 2
	print('-- guessing Journal article')
	markdown.write('publication_types: ["'+str(publication_type)+'"]\n')

	publication = '"' +r['Publication'].replace('"', '\\"')+ '"'
	markdown.write('publication: ' + publication + '\n')

	markdown.write('abstract: \n')
	markdown.write('summary: \n')

	markdown.write("""

	tags:
	#- Source Themes
	#featured: true

	links:
	#- name: Custom Link
	# url: http://example.org
	#url_pdf: '#'
	#url_code: '#'
	#url_dataset: '#'
	#url_poster: '#'
	#url_project: ''
	#url_slides: ''
	#url_source: '#'
	#url_video: '#'

	# Featured image
	# To use, add an image named `featured.jpg/png` to your page's folder.
	image:
	# caption: 'Image credit: [Unsplash](https://unsplash.com/photos/pLCdAaMFLTE)'
	focal_point: ""
	preview_only: false

	# Associated Projects (optional).
	# Associate this publication with one or more of your projects.
	# Simply enter your project's folder or file name without extension.
	# E.g. `internal-project` references `content/project/internal-project/index.md`.
	# Otherwise, set `projects: []`.
	projects: []
	#- internal-project

	---
	""")