Skip to content

Instantly share code, notes, and snippets.

@cboulanger
Last active April 1, 2021 06:42
Show Gist options
  • Save cboulanger/e3719a774af761048100aa2271521fb2 to your computer and use it in GitHub Desktop.
Save cboulanger/e3719a774af761048100aa2271521fb2 to your computer and use it in GitHub Desktop.
One-way (read-only) synchronization from zotero.org to a local key-value datastore (couchbase)
/**
* Naive prototype function that does a remote-to-local synchronization of user
* and group libraries that are accessible to the owner of the API token. The
* external `sandbox` object provides preconfigured api objects which access the
* Zotero server and the local store (a couchbase server in the case of this
* prototype), and a gauge widget for some nice visual feedback.
*
* This is only a partial implementation of
* https://www.zotero.org/support/dev/web_api/v3/syncing . In particular, the
* code is unaware of mid-sync version changes, since the sync is needed only
* for data analysis.
*/
async function zoteroSync() {
try {
// the sync adapter implementing the methods used here
const syncAdapter = sandbox.getSyncAdapter();
const zotApi = sandbox.getZoteroApi();
// the id of the API token's owner
const userId = await zotApi.getUserId();
// the ids of the groups to which this user has access to
const groupIds = await zotApi.getGroupIds();
// library ids as understood by the underlying zotero API client
const libraryIds = ["u"+userId].concat(groupIds.map(groupId => "g" + groupId));
const types = ["collections", "items"];
// loop over accessible libraries
let libCounter = 0;
let libNumber = libraryIds.length;
for (let libraryId of libraryIds) {
libCounter++;
// library to sync
const library = sandbox.getZoteroLibrary(libraryId);
// inform user
let {name} = await library.getInfo();
sandbox.gauge.show(`Synchronizing '${name}'`, libCounter/libNumber);
// store for sync metadata
const metaStore = await syncAdapter.getStore("meta", libraryId);
// if a local library version is stored, use it to retrieve records changed on the server
const versionKey = "version";
let localVersion;
if ((await metaStore.exists(versionKey))) {
localVersion = await metaStore.get(versionKey);
} else {
localVersion = 0;
await metaStore.upsert(versionKey, localVersion);
}
// get deleted keys for later
const deletedInfo = await library.getDeletedSince(localVersion);
let remoteVersion = deletedInfo.version;
if (remoteVersion === localVersion) {
// up-to-date
continue;
}
// loop over types of record
for (let type of types) {
// store for the records of the specific type of the given library
const typeStore = await syncAdapter.getStore(type, libraryId);
// i. Get updated data, see https://www.zotero.org/support/dev/web_api/v3/syncing#i_get_updated_data
sandbox.gauge.show(`Checking for changed ${type} in '${name}'...`);
// get keys of remotely changed records
let { keys } = await library.getModifiedSince(type, localVersion);
if (keys.length) {
// if we have local records, retrieve them to compare their versions
let keyVersionMap = await typeStore.getRecordVersions(keys);
for (let [key, localRecordVersion] of Object.entries(keyVersionMap)) {
// skip remote record in case of incomplete first sync (to avoid re-donwload) or when local version has equal or newer version
if (localVersion === 0 || localRecordVersion >= remoteVersion) {
keys.splice(keys.indexOf(key),1);
}
}
let total = keys.length;
let counter = 0;
while (counter < total) {
let range = keys.slice(counter, counter+100);
// get record data for the given range of keys
let records = await library.getRecordData(type, range);
for (let record of records) {
counter++;
sandbox.gauge.show(`Updating ${counter} of ${total} changed ${type} in '${name}'`, counter/total);
await typeStore.upsert(record.key, record);
}
}
}
// ii. Get deleted data, see https://www.zotero.org/support/dev/web_api/v3/syncing#ii_get_deleted_data
if (deletedInfo[type].length) {
// Todo: if the number of deletions is huge, ask user first
sandbox.gauge.show(`Removing remotely deleted ${type} in ${name}...`);
for (let key of deletedInfo[type]) {
try {
await typeStore.remove(key);
} catch(e) {
// ignore non-existent local records
}
}
}
}
// sync done, store remote version as local version
await metaStore.upsert(versionKey, remoteVersion);
}
} catch (e) {
sandbox.gauge.hide();
console.error("error at :" + sandbox.gauge._status.section);
throw e;
}
}
const process = require('process');
const couchbase = require("couchbase");
class SyncAdapter {
/**
* Set up the adapter
* @param url
* @param username
* @param password
* @param options? Optional options: {bucketName?:"zotero"}
*/
constructor(url, username, password, options={}) {
this.url = url;
this.username = username;
this.password = password;
this.bucketName = options.bucketName || "zotero";
}
/**
* Returns the store instance for the given combination of storeName and libraryId.
* Interface method.
* @param {String} storeName
* @param {String} libraryId
* @returns {Promise<Store>}
*/
async getStore(storeName, libraryId) {
return await (new Store(this, storeName, libraryId)).init();
}
//////// Non-interface methods /////////
/**
* Returns the active cluster instance
* @returns {Promise<Cluster>}
*/
async getCluster() {
if (!this.cluster) {
this.cluster = await couchbase.connect(this.url, {
username: this.username,
password: this.password,
});
}
return this.cluster;
}
/**
* Returns the couchbase bucket in which the zotero data is stored
* @returns {Promise<Bucket>}
*/
async getZoteroBucket() {
return (await this.getCluster()).bucket(this.bucketName);
}
/**
* Creates an index on the given collection
* @param {String} scopeName
* @param {String} collectionName
* @returns {Promise<void>}
*/
async createPrimaryIndex( scopeName, collectionName) {
let query = `create primary index on default:${this.bucketName}.${scopeName}.${collectionName}`
try {
await (await this.getCluster()).query(query);
console.log(`Created primary index on default:${this.bucketName}.${scopeName}.${collectionName}`);
} catch (e) {
if (e.message.includes("index exists")) {
return;
}
throw e;
}
const attempts = 3;
let timeout = 3;
let tries = 0;
while (tries <= attempts) {
tries++;
let query = `SELECT RAW state FROM system:indexes WHERE name = "#primary"
AND bucket_id = "${this.bucketName}"
AND scope_id ="${scopeName}"
AND keyspace_id ="${collectionName}"`
let result = (await (await this.getCluster()).query(query)).rows;
if (result.length && result[0] === "online") {
break;
}
await new Promise(resolve => setTimeout(resolve, timeout*1000));
}
if (tries === attempts) {
throw new Error(`Aborted waiting for primary index on ${this.bucketName}.${scopeName}.${collectionName} after ${timeout * tries} seconds`);
}
}
}
/**
* A model for a local store. Ideally, the class methods can be generalized into an
* interface for any key-value store into which zotero data can be saved.
*/
class Store {
/**
* Sets up the instance
* @param {SyncAdapter} adapter
* @param {String} storeName
* @param {String} libraryId
*/
constructor(adapter, storeName, libraryId) {
this.adapter = adapter;
this.name = storeName;
this.libraryId = libraryId;
this.collection = null;
}
/**
* Initialize the store. Returns the instance.
* @returns {Promise<Store>}
*/
async init() {
const libraryId = this.libraryId;
const storeName = this.name;
const zoteroBucket = await this.adapter.getZoteroBucket();
let wait = false;
// create scope
try {
await zoteroBucket.collections().createScope(libraryId);
//console.log(`Created scope ${this.bucketName}.${libraryId}`);
wait = true;
} catch (e) {
if (!e.message.includes("exists")) {
throw e;
}
}
// create collection
let collSpec = {
name: storeName,
scopeName: libraryId,
maxExpiry: 0
};
try {
await zoteroBucket.collections().createCollection(collSpec);
//console.log(`Created collection ${this.bucketName}.${libraryId}.${storeName}`);
wait = true;
} catch (e) {
if (!e.message.includes("exists")) {
throw e;
}
}
// small timeout for couchbase to create the needed objects
if (wait) {
await new Promise(resolve => setTimeout(resolve, 1000));
}
await this.adapter.createPrimaryIndex(libraryId, storeName);
this.collection = (await this.adapter.getZoteroBucket()).scope(libraryId).collection(storeName);
return this;
}
/**
* Checks if a key exists in the store
* @param key
* @returns {Promise<Boolean>}
*/
async exists(key) {
return (await this.collection.exists(key)).exists;
}
/**
* Returns the value for a key
* @param key
* @returns {Promise<*>}
*/
async get(key) {
return (await this.collection.get(key)).content;
}
/**
* inserts a document or replaces it if it exists
* @param key
* @param value
* @returns {Promise<void>}
*/
async upsert(key, value) {
await this.collection.upsert(key, value);
}
/**
* Deletes a key and its value from the store
* @param key
* @returns {Promise<void>}
*/
async remove(key) {
await this.collection.remove(key);
}
/**
* Returns a map of keys (keys) and versions (values) of the records with the given
* zotero keys
* @param {String[]} keys
* @returns {Promise<{}>}
*/
async getRecordVersions(keys) {
let keyList = keys.map(key => `'${key}'`).join(",");
let query = `select \`key\`, version from ${this.adapter.bucketName}.${this.libraryId}.${this.name} where \`key\` in [${keyList}]`
let result = (await (await this.adapter.getCluster()).query(query)).rows;
let map = {};
for (let {key, version} of result) {
map[key] = version;
}
return map;
}
}
module.exports = {
SyncAdapter,
Store
};
const zotero = require('zotero-api-client');
const fetch = require('node-fetch');
class Api {
/**
* Configures the instance with the API key
* @param {String} key
*/
constructor(key) {
this.__api_key = key;
this.resetState();
}
/**
* Reset cached library state so that it will be re-fetched from zotero.org
*/
resetState() {
this.__state = {
keyAccess: null,
groupVersions: null,
groupData: null
};
}
/**
* Returns a (low-level) zotero-api-client api instance
* @returns {Object}
*/
getApi() {
return zotero(this.__api_key);
}
/**
* Returns a (low-level) zotero-api-client api instance preconfigured for user or group library with
* the given id
* @param {String} libraryId Id of the library prefixed by "u" for user libraries and "g" for
* group libraries
* @returns {Object}
*/
getLibraryApi(libraryId) {
return this.getApi().library(libraryId);
}
/**
* Returns the response to /keys/current request
* @returns {Promise<Object>}
*/
async getKeyAccess() {
if (!this.__state.keyAccess) {
this.__state.keyAccess = await (await fetch(`https://api.zotero.org/keys/current`, {
headers: {"Zotero-API-Key": this.__api_key}
})).json();
}
return this.__state.keyAccess;
}
/**
* Returns the user id of the owner of the API key
* @returns {Promise<*>}
*/
async getUserId() {
return (await this.getKeyAccess()).userID;
}
/**
* Returns a map of user-accessible groups (keys) and their latest-modified-version (values)
* @returns {Promise<Object>}
*/
async getGroupVersions() {
if (!this.__state.groupVersions) {
let userId = await this.getUserId();
this.__state.groupVersions = await (await fetch(`https://api.zotero.org/users/${userId}/groups?format=versions`, {
headers: {"Zotero-API-Key": this.__api_key}
})).json();
}
return this.__state.groupVersions;
}
/**
* Returns a list of the ids of the groups to which the owner of the current API key has access.
* @returns {Promise<String[]>}
*/
async getGroupIds() {
return Object.keys(await this.getGroupVersions());
}
/**
* Returns a map of group ids (keys) and group data (values) of the groups to which the owner of the current API key has access.
* @returns {Promise<Object>}
*/
async getGroupsData() {
if (!this.__state.groupData) {
this.__state.groupData = {};
for (let groupId of Object.keys(await this.getGroupVersions())) {
this.__state.groupData[String(groupId)] = await (await fetch(`https://api.zotero.org/groups/${groupId}`, {
headers: {"Zotero-API-Key": this.__api_key}
})).json();
}
}
return this.__state.groupData;
}
}
class Library {
/**
* The library instance constructor
* @param {Api} api
* @param {String} libraryId
*/
constructor(api, libraryId) {
this.id = libraryId;
this.highLevelApi = api;
this.lowLevelApi = api.getLibraryApi(libraryId);
}
/**
* Returns information on the library
* @returns {Promise<{name: string, id: string, type: string}>}
*/
async getInfo() {
let name, type, id = this.id;
if (this.id[0] === "g") {
let data = (await this.highLevelApi.getGroupsData())[id.slice(1)].data;
name = data.name;
type = "group"
} else {
name = "User Library"
type = "user"
}
return {
name,
type,
id
};
}
/**
* Returns an array of keys of records of the given type (items, collections, ...)
* which have changed for since the given version number
* @see https://www.zotero.org/support/dev/web_api/v3/syncing#i_get_updated_data
* @param version
* @returns {Promise<{version:Number, keys:string[]}>}
*/
async getModifiedSince(type, version) {
this.assertValidType(type);
const options = {since:version, format:"keys"};
let response = await this.lowLevelApi[type]().get(options);
if (version === response.getVersion()) {
return {
version,
keys: []
};
}
version = response.getVersion();
let keys = (await response.getData().text()).trim().split("\n").filter(key => Boolean(key));
return { version, keys };
}
/**
* Returns a map containing as keys the type names (items, collections, ...) and
* as values an array of the keys of type records which have been deleted since the
* given version. The map also contains the key `version` with the current version
* of the library.
* which have been deleted since the given version number
* @see https://www.zotero.org/support/dev/web_api/v3/syncing#ii_get_deleted_data
* @returns {Promise<{ version: Number, collections: String[], items: String[], searches: String[], tags: String[], settings: String[] }>}
*/
async getDeletedSince(version) {
let response = await this.lowLevelApi.deleted(String(version)).get();
let data = response.getData();
data.version = response.getVersion();
return data;
}
/**
* Retrieves the json data of the records with the given type (collection, items, ...), having the given
* key.
* @param {String} type
* @param {String[]} keys
* @returns {Promise<Object[]>}
*/
async getRecordData(type, keys) {
this.assertValidType(type);
return (await this.lowLevelApi[type]().get({itemKey:keys.join(",")})).getData();
}
/**
* Checks that the type is supported by the library
* @param type
* @throws Error if type is not supported
*/
assertValidType(type) {
if (!["items","collections"].includes(type)) {
throw new Error(`Type '${type}' is not supported.`);
}
}
}
module.exports = {
Api,
Library
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment