Last active
April 1, 2021 06:42
-
-
Save cboulanger/e3719a774af761048100aa2271521fb2 to your computer and use it in GitHub Desktop.
One-way (read-only) synchronization from zotero.org to a local key-value datastore (couchbase)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Naive prototype function that does a remote-to-local synchronization of user | |
* and group libraries that are accessible to the owner of the API token. The | |
* external `sandbox` object provides preconfigured api objects which access the | |
* Zotero server and the local store (a couchbase server in the case of this | |
* prototype), and a gauge widget for some nice visual feedback. | |
* | |
* This is only a partial implementation of | |
* https://www.zotero.org/support/dev/web_api/v3/syncing . In particular, the | |
* code is unaware of mid-sync version changes, since the sync is needed only | |
* for data analysis. | |
*/ | |
async function zoteroSync() { | |
try { | |
// the sync adapter implementing the methods used here | |
const syncAdapter = sandbox.getSyncAdapter(); | |
const zotApi = sandbox.getZoteroApi(); | |
// the id of the API token's owner | |
const userId = await zotApi.getUserId(); | |
// the ids of the groups to which this user has access to | |
const groupIds = await zotApi.getGroupIds(); | |
// library ids as understood by the underlying zotero API client | |
const libraryIds = ["u"+userId].concat(groupIds.map(groupId => "g" + groupId)); | |
const types = ["collections", "items"]; | |
// loop over accessible libraries | |
let libCounter = 0; | |
let libNumber = libraryIds.length; | |
for (let libraryId of libraryIds) { | |
libCounter++; | |
// library to sync | |
const library = sandbox.getZoteroLibrary(libraryId); | |
// inform user | |
let {name} = await library.getInfo(); | |
sandbox.gauge.show(`Synchronizing '${name}'`, libCounter/libNumber); | |
// store for sync metadata | |
const metaStore = await syncAdapter.getStore("meta", libraryId); | |
// if a local library version is stored, use it to retrieve records changed on the server | |
const versionKey = "version"; | |
let localVersion; | |
if ((await metaStore.exists(versionKey))) { | |
localVersion = await metaStore.get(versionKey); | |
} else { | |
localVersion = 0; | |
await metaStore.upsert(versionKey, localVersion); | |
} | |
// get deleted keys for later | |
const deletedInfo = await library.getDeletedSince(localVersion); | |
let remoteVersion = deletedInfo.version; | |
if (remoteVersion === localVersion) { | |
// up-to-date | |
continue; | |
} | |
// loop over types of record | |
for (let type of types) { | |
// store for the records of the specific type of the given library | |
const typeStore = await syncAdapter.getStore(type, libraryId); | |
// i. Get updated data, see https://www.zotero.org/support/dev/web_api/v3/syncing#i_get_updated_data | |
sandbox.gauge.show(`Checking for changed ${type} in '${name}'...`); | |
// get keys of remotely changed records | |
let { keys } = await library.getModifiedSince(type, localVersion); | |
if (keys.length) { | |
// if we have local records, retrieve them to compare their versions | |
let keyVersionMap = await typeStore.getRecordVersions(keys); | |
for (let [key, localRecordVersion] of Object.entries(keyVersionMap)) { | |
// skip remote record in case of incomplete first sync (to avoid re-donwload) or when local version has equal or newer version | |
if (localVersion === 0 || localRecordVersion >= remoteVersion) { | |
keys.splice(keys.indexOf(key),1); | |
} | |
} | |
let total = keys.length; | |
let counter = 0; | |
while (counter < total) { | |
let range = keys.slice(counter, counter+100); | |
// get record data for the given range of keys | |
let records = await library.getRecordData(type, range); | |
for (let record of records) { | |
counter++; | |
sandbox.gauge.show(`Updating ${counter} of ${total} changed ${type} in '${name}'`, counter/total); | |
await typeStore.upsert(record.key, record); | |
} | |
} | |
} | |
// ii. Get deleted data, see https://www.zotero.org/support/dev/web_api/v3/syncing#ii_get_deleted_data | |
if (deletedInfo[type].length) { | |
// Todo: if the number of deletions is huge, ask user first | |
sandbox.gauge.show(`Removing remotely deleted ${type} in ${name}...`); | |
for (let key of deletedInfo[type]) { | |
try { | |
await typeStore.remove(key); | |
} catch(e) { | |
// ignore non-existent local records | |
} | |
} | |
} | |
} | |
// sync done, store remote version as local version | |
await metaStore.upsert(versionKey, remoteVersion); | |
} | |
} catch (e) { | |
sandbox.gauge.hide(); | |
console.error("error at :" + sandbox.gauge._status.section); | |
throw e; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const process = require('process'); | |
const couchbase = require("couchbase"); | |
class SyncAdapter { | |
/** | |
* Set up the adapter | |
* @param url | |
* @param username | |
* @param password | |
* @param options? Optional options: {bucketName?:"zotero"} | |
*/ | |
constructor(url, username, password, options={}) { | |
this.url = url; | |
this.username = username; | |
this.password = password; | |
this.bucketName = options.bucketName || "zotero"; | |
} | |
/** | |
* Returns the store instance for the given combination of storeName and libraryId. | |
* Interface method. | |
* @param {String} storeName | |
* @param {String} libraryId | |
* @returns {Promise<Store>} | |
*/ | |
async getStore(storeName, libraryId) { | |
return await (new Store(this, storeName, libraryId)).init(); | |
} | |
//////// Non-interface methods ///////// | |
/** | |
* Returns the active cluster instance | |
* @returns {Promise<Cluster>} | |
*/ | |
async getCluster() { | |
if (!this.cluster) { | |
this.cluster = await couchbase.connect(this.url, { | |
username: this.username, | |
password: this.password, | |
}); | |
} | |
return this.cluster; | |
} | |
/** | |
* Returns the couchbase bucket in which the zotero data is stored | |
* @returns {Promise<Bucket>} | |
*/ | |
async getZoteroBucket() { | |
return (await this.getCluster()).bucket(this.bucketName); | |
} | |
/** | |
* Creates an index on the given collection | |
* @param {String} scopeName | |
* @param {String} collectionName | |
* @returns {Promise<void>} | |
*/ | |
async createPrimaryIndex( scopeName, collectionName) { | |
let query = `create primary index on default:${this.bucketName}.${scopeName}.${collectionName}` | |
try { | |
await (await this.getCluster()).query(query); | |
console.log(`Created primary index on default:${this.bucketName}.${scopeName}.${collectionName}`); | |
} catch (e) { | |
if (e.message.includes("index exists")) { | |
return; | |
} | |
throw e; | |
} | |
const attempts = 3; | |
let timeout = 3; | |
let tries = 0; | |
while (tries <= attempts) { | |
tries++; | |
let query = `SELECT RAW state FROM system:indexes WHERE name = "#primary" | |
AND bucket_id = "${this.bucketName}" | |
AND scope_id ="${scopeName}" | |
AND keyspace_id ="${collectionName}"` | |
let result = (await (await this.getCluster()).query(query)).rows; | |
if (result.length && result[0] === "online") { | |
break; | |
} | |
await new Promise(resolve => setTimeout(resolve, timeout*1000)); | |
} | |
if (tries === attempts) { | |
throw new Error(`Aborted waiting for primary index on ${this.bucketName}.${scopeName}.${collectionName} after ${timeout * tries} seconds`); | |
} | |
} | |
} | |
/** | |
* A model for a local store. Ideally, the class methods can be generalized into an | |
* interface for any key-value store into which zotero data can be saved. | |
*/ | |
class Store { | |
/** | |
* Sets up the instance | |
* @param {SyncAdapter} adapter | |
* @param {String} storeName | |
* @param {String} libraryId | |
*/ | |
constructor(adapter, storeName, libraryId) { | |
this.adapter = adapter; | |
this.name = storeName; | |
this.libraryId = libraryId; | |
this.collection = null; | |
} | |
/** | |
* Initialize the store. Returns the instance. | |
* @returns {Promise<Store>} | |
*/ | |
async init() { | |
const libraryId = this.libraryId; | |
const storeName = this.name; | |
const zoteroBucket = await this.adapter.getZoteroBucket(); | |
let wait = false; | |
// create scope | |
try { | |
await zoteroBucket.collections().createScope(libraryId); | |
//console.log(`Created scope ${this.bucketName}.${libraryId}`); | |
wait = true; | |
} catch (e) { | |
if (!e.message.includes("exists")) { | |
throw e; | |
} | |
} | |
// create collection | |
let collSpec = { | |
name: storeName, | |
scopeName: libraryId, | |
maxExpiry: 0 | |
}; | |
try { | |
await zoteroBucket.collections().createCollection(collSpec); | |
//console.log(`Created collection ${this.bucketName}.${libraryId}.${storeName}`); | |
wait = true; | |
} catch (e) { | |
if (!e.message.includes("exists")) { | |
throw e; | |
} | |
} | |
// small timeout for couchbase to create the needed objects | |
if (wait) { | |
await new Promise(resolve => setTimeout(resolve, 1000)); | |
} | |
await this.adapter.createPrimaryIndex(libraryId, storeName); | |
this.collection = (await this.adapter.getZoteroBucket()).scope(libraryId).collection(storeName); | |
return this; | |
} | |
/** | |
* Checks if a key exists in the store | |
* @param key | |
* @returns {Promise<Boolean>} | |
*/ | |
async exists(key) { | |
return (await this.collection.exists(key)).exists; | |
} | |
/** | |
* Returns the value for a key | |
* @param key | |
* @returns {Promise<*>} | |
*/ | |
async get(key) { | |
return (await this.collection.get(key)).content; | |
} | |
/** | |
* inserts a document or replaces it if it exists | |
* @param key | |
* @param value | |
* @returns {Promise<void>} | |
*/ | |
async upsert(key, value) { | |
await this.collection.upsert(key, value); | |
} | |
/** | |
* Deletes a key and its value from the store | |
* @param key | |
* @returns {Promise<void>} | |
*/ | |
async remove(key) { | |
await this.collection.remove(key); | |
} | |
/** | |
* Returns a map of keys (keys) and versions (values) of the records with the given | |
* zotero keys | |
* @param {String[]} keys | |
* @returns {Promise<{}>} | |
*/ | |
async getRecordVersions(keys) { | |
let keyList = keys.map(key => `'${key}'`).join(","); | |
let query = `select \`key\`, version from ${this.adapter.bucketName}.${this.libraryId}.${this.name} where \`key\` in [${keyList}]` | |
let result = (await (await this.adapter.getCluster()).query(query)).rows; | |
let map = {}; | |
for (let {key, version} of result) { | |
map[key] = version; | |
} | |
return map; | |
} | |
} | |
module.exports = { | |
SyncAdapter, | |
Store | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const zotero = require('zotero-api-client'); | |
const fetch = require('node-fetch'); | |
class Api { | |
/** | |
* Configures the instance with the API key | |
* @param {String} key | |
*/ | |
constructor(key) { | |
this.__api_key = key; | |
this.resetState(); | |
} | |
/** | |
* Reset cached library state so that it will be re-fetched from zotero.org | |
*/ | |
resetState() { | |
this.__state = { | |
keyAccess: null, | |
groupVersions: null, | |
groupData: null | |
}; | |
} | |
/** | |
* Returns a (low-level) zotero-api-client api instance | |
* @returns {Object} | |
*/ | |
getApi() { | |
return zotero(this.__api_key); | |
} | |
/** | |
* Returns a (low-level) zotero-api-client api instance preconfigured for user or group library with | |
* the given id | |
* @param {String} libraryId Id of the library prefixed by "u" for user libraries and "g" for | |
* group libraries | |
* @returns {Object} | |
*/ | |
getLibraryApi(libraryId) { | |
return this.getApi().library(libraryId); | |
} | |
/** | |
* Returns the response to /keys/current request | |
* @returns {Promise<Object>} | |
*/ | |
async getKeyAccess() { | |
if (!this.__state.keyAccess) { | |
this.__state.keyAccess = await (await fetch(`https://api.zotero.org/keys/current`, { | |
headers: {"Zotero-API-Key": this.__api_key} | |
})).json(); | |
} | |
return this.__state.keyAccess; | |
} | |
/** | |
* Returns the user id of the owner of the API key | |
* @returns {Promise<*>} | |
*/ | |
async getUserId() { | |
return (await this.getKeyAccess()).userID; | |
} | |
/** | |
* Returns a map of user-accessible groups (keys) and their latest-modified-version (values) | |
* @returns {Promise<Object>} | |
*/ | |
async getGroupVersions() { | |
if (!this.__state.groupVersions) { | |
let userId = await this.getUserId(); | |
this.__state.groupVersions = await (await fetch(`https://api.zotero.org/users/${userId}/groups?format=versions`, { | |
headers: {"Zotero-API-Key": this.__api_key} | |
})).json(); | |
} | |
return this.__state.groupVersions; | |
} | |
/** | |
* Returns a list of the ids of the groups to which the owner of the current API key has access. | |
* @returns {Promise<String[]>} | |
*/ | |
async getGroupIds() { | |
return Object.keys(await this.getGroupVersions()); | |
} | |
/** | |
* Returns a map of group ids (keys) and group data (values) of the groups to which the owner of the current API key has access. | |
* @returns {Promise<Object>} | |
*/ | |
async getGroupsData() { | |
if (!this.__state.groupData) { | |
this.__state.groupData = {}; | |
for (let groupId of Object.keys(await this.getGroupVersions())) { | |
this.__state.groupData[String(groupId)] = await (await fetch(`https://api.zotero.org/groups/${groupId}`, { | |
headers: {"Zotero-API-Key": this.__api_key} | |
})).json(); | |
} | |
} | |
return this.__state.groupData; | |
} | |
} | |
class Library { | |
/** | |
* The library instance constructor | |
* @param {Api} api | |
* @param {String} libraryId | |
*/ | |
constructor(api, libraryId) { | |
this.id = libraryId; | |
this.highLevelApi = api; | |
this.lowLevelApi = api.getLibraryApi(libraryId); | |
} | |
/** | |
* Returns information on the library | |
* @returns {Promise<{name: string, id: string, type: string}>} | |
*/ | |
async getInfo() { | |
let name, type, id = this.id; | |
if (this.id[0] === "g") { | |
let data = (await this.highLevelApi.getGroupsData())[id.slice(1)].data; | |
name = data.name; | |
type = "group" | |
} else { | |
name = "User Library" | |
type = "user" | |
} | |
return { | |
name, | |
type, | |
id | |
}; | |
} | |
/** | |
* Returns an array of keys of records of the given type (items, collections, ...) | |
* which have changed for since the given version number | |
* @see https://www.zotero.org/support/dev/web_api/v3/syncing#i_get_updated_data | |
* @param version | |
* @returns {Promise<{version:Number, keys:string[]}>} | |
*/ | |
async getModifiedSince(type, version) { | |
this.assertValidType(type); | |
const options = {since:version, format:"keys"}; | |
let response = await this.lowLevelApi[type]().get(options); | |
if (version === response.getVersion()) { | |
return { | |
version, | |
keys: [] | |
}; | |
} | |
version = response.getVersion(); | |
let keys = (await response.getData().text()).trim().split("\n").filter(key => Boolean(key)); | |
return { version, keys }; | |
} | |
/** | |
* Returns a map containing as keys the type names (items, collections, ...) and | |
* as values an array of the keys of type records which have been deleted since the | |
* given version. The map also contains the key `version` with the current version | |
* of the library. | |
* which have been deleted since the given version number | |
* @see https://www.zotero.org/support/dev/web_api/v3/syncing#ii_get_deleted_data | |
* @returns {Promise<{ version: Number, collections: String[], items: String[], searches: String[], tags: String[], settings: String[] }>} | |
*/ | |
async getDeletedSince(version) { | |
let response = await this.lowLevelApi.deleted(String(version)).get(); | |
let data = response.getData(); | |
data.version = response.getVersion(); | |
return data; | |
} | |
/** | |
* Retrieves the json data of the records with the given type (collection, items, ...), having the given | |
* key. | |
* @param {String} type | |
* @param {String[]} keys | |
* @returns {Promise<Object[]>} | |
*/ | |
async getRecordData(type, keys) { | |
this.assertValidType(type); | |
return (await this.lowLevelApi[type]().get({itemKey:keys.join(",")})).getData(); | |
} | |
/** | |
* Checks that the type is supported by the library | |
* @param type | |
* @throws Error if type is not supported | |
*/ | |
assertValidType(type) { | |
if (!["items","collections"].includes(type)) { | |
throw new Error(`Type '${type}' is not supported.`); | |
} | |
} | |
} | |
module.exports = { | |
Api, | |
Library | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment