Last active
April 5, 2022 22:15
-
-
Save lognaturel/b9420c4211682bdeb0c27a0eae25187d to your computer and use it in GitHub Desktop.
A basic example of getting ODK data as a pandas dataframe
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Copyright 2022 ODK | |
Licensed under the Apache License, Version 2.0 (the "License"); | |
you may not use this file except in compliance with the License. | |
You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, software | |
distributed under the License is distributed on an "AS IS" BASIS, | |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
See the License for the specific language governing permissions and | |
limitations under the License. | |
""" | |
# !/usr/bin/env python3 | |
from typing import Optional | |
import requests | |
import json | |
import pandas as pd | |
from pandas import DataFrame | |
def get_data(url: str, username: str, password: str, project: int, formid: str, table: Optional[str] = "Submissions", | |
cache_file: Optional[str] = None) -> Optional[DataFrame]: | |
"""Get non-rejected data for a specific form as a pandas dataframe | |
Parameters: | |
url: the base URL of the Central server to connect to | |
username: the username of the Web User to auth with | |
password: the Web User's password | |
project: the numeric id of the project to get data from | |
formid: the formid to get data from | |
table (optional): if there are repeats, specify the repeat name to get the table for that repeat | |
cache_file (optional): a file for caching the session token. This is recommended to minimize the login events logged | |
on the server. | |
Returns: | |
Optional[DataFrame]: the dataframe or None if anything has gone wrong | |
""" | |
token = get_token(url, username, password, cache_file) | |
response = requests.get( | |
f"{url}/v1/projects/{project}/forms/{formid}.svc/{table}?$filter=__system/reviewState ne 'rejected'", | |
headers={"Content-Type": "application/json", "Authorization": f"Bearer {token}"} | |
) | |
try: | |
if len(response.json()['value']) == 0: | |
return None | |
except KeyError: | |
print(response.json()) # Something went wrong with the query | |
return None | |
return pd.json_normalize(response.json()['value'], sep='/') | |
def get_token(url: str, username: str, password: str, cache_file: Optional[str] = None): | |
"""Get a verified session token with the provided credential. First tries from cache if a cache file is provided, | |
then falls back to requesting a new session" | |
Parameters: | |
url: the base URL of the Central server to connect to | |
username: the username of the Web User to auth with | |
password: the Web User's password | |
cache_file (optional): a file for caching the session token. This is recommended to minimize the login events logged | |
on the server. | |
Returns: | |
Optional[str]: the session token or None if anything has gone wrong | |
""" | |
token = get_verified_cached_token(url, cache_file) or get_new_token(url, username, password) | |
if not token: | |
raise SystemExit("Unable to get session token") | |
if cache_file is not None: | |
write_to_cache(cache_file, "token", token) | |
return token | |
def get_verified_cached_token(url: str, cache_file: Optional[str] = None) -> Optional[str]: | |
"""Try to read a Central session token from the "token" property of a JSON cache file with the given filename""" | |
if cache_file is None: | |
return None | |
try: | |
with open(cache_file) as cache_file: | |
cache = json.load(cache_file) | |
token = cache["token"] | |
user_details_response = requests.get( | |
f"{url}/v1/users/current", | |
headers={"Content-Type": "application/json", "Authorization": f"Bearer {token}"} | |
) | |
if user_details_response.ok: | |
return token | |
except (FileNotFoundError, KeyError): | |
return None | |
def get_new_token(url: str, username: str, password: str) -> Optional[str]: | |
"""Get a new token from Central by creating a new session (https://odkcentral.docs.apiary.io/#reference/authentication/session-authentication/logging-in) | |
Parameters: | |
url: the base URL of the Central server to connect to | |
username: the username of the Web User to auth with | |
password: the Web User's password | |
Returns: | |
Optional[str]: the session token or None if anything has gone wrong | |
""" | |
email_token_response = requests.post( | |
f"{url}/v1/sessions", | |
data=json.dumps({"email": username, "password": password}), | |
headers={"Content-Type": "application/json"}, | |
) | |
if email_token_response.status_code == 200: | |
return email_token_response.json()["token"] | |
def write_to_cache(cache_file: str, key: str, value: str): | |
"""Add the given key/value pair to the provided cache file, preserving any other properties it may have""" | |
try: | |
with open(cache_file) as file: | |
cache = json.load(file) | |
cache[key] = value | |
except FileNotFoundError: | |
cache = {key: value} | |
with open(cache_file, 'w') as outfile: | |
json.dump(cache, outfile) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment