This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import csv | |
from pymarc import MARCReader | |
from os import listdir | |
from re import search | |
# change this line to match your folder structure | |
SRC_DIR = '/path/to/mrc/records' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# | |
# Match authors against VIAF | |
# | |
# License: http://dev.perl.org/licenses/artistic.html | |
# | |
# Author: Patrick Hochstenbach <[email protected]> | |
# | |
# Apr 2015 | |
$|++; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import xml.parsers.expat | |
from xml.sax.saxutils import escape | |
from optparse import OptionParser | |
from math import log10 | |
# How much data we process at a time |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<qualifieddc xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1" xmlns:dcterms="http://purl.org/dc/terms" xmlns:marcrel="http://www.loc.gov/marc.relators" xsi:schemaLocation="http://www.loc.gov/marc.relators http://imlsdcc2.grainger.illinois.edu/registry/marcrel.xsd" xsi:noNamespaceSchemaLocation="http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd"> | |
{{forNonBlank(cells["id"], v, "<dc:identifier>"+v.value+"</dc:identifier>", "")}} | |
{{forNonBlank(cells["Title"], v, "<dc:title>"+v.value+"</dc:title>", "")}} | |
{{forNonBlank(cells["Creator"], v, "<dc:creator>"+v.value+"</dc:creator>", "")}} | |
{{forNonBlank(cells["Date"], v, "<dc:date>"+v.value+"</dc:date>", "")}} | |
{{forNonBlank(cells["Description"], v, "<dc:description>"+v.value+"</dc:description>", "")}} | |
{{forNonBlank(cells["Description2"], v, "<dc:description>"+v.value+"</dc:description>", "")}} | |
{{forNonBlank(cells["Rights"], v, "<dc:rights>"+v.value+"</dc:rights>", "")}} | |
{{forNonBlank(cells["Type"], v, "<dc: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(stringr) | |
library(purrr) | |
library(rvest) | |
#------------------------------------------------------------------------------# | |
# Author: Andrew Do | |
# Purpose: A bunch of utility functions for the main ScrapeCityToPage The goal | |
# is to be able to scrape up to a specified page number for a given city and | |
# then to store that information as a data frame. The resulting data frame will | |
# be raw and will require additional cleaning, but the structure is more or less |