Skip to content

Instantly share code, notes, and snippets.

@dblanchette
Last active October 19, 2025 08:43
Show Gist options
  • Select an option

  • Save dblanchette/b8ed8cf42431f56024c1c70ed5137e0f to your computer and use it in GitHub Desktop.

Select an option

Save dblanchette/b8ed8cf42431f56024c1c70ed5137e0f to your computer and use it in GitHub Desktop.
import os
from datetime import datetime, timedelta
from typing import Any, Dict, Generator, List, Union
import requests
# Optional - to connect using OAuth credentials
from oauthlib.oauth1 import SIGNATURE_RSA
class JiraClient:
def __init__(
self,
username: str = None,
api_token: str = None,
access_token: str = None,
access_token_secret: str = None,
consumer_key: str = None,
key_cert: str = None,
):
self._user_url = os.getenv("JIRA_SERVER", "").rstrip("/")
self._base_url = f"{self._user_url}/rest/api/3"
if username and api_token:
self._session = requests.Session()
self._session.auth = (username, api_token)
return
elif access_token and access_token_secret and consumer_key and key_cert:
self._session = OAuth1Session(
consumer_key,
rsa_key=key_cert,
resource_owner_key=access_token,
resource_owner_secret=access_token_secret,
signature_method=SIGNATURE_RSA,
)
else:
raise ValueError("Must use API token or OAuth credentials")
def _get_paginated_results(
self, url: str, results_key: str, parameters: Dict[str, Union[str, int]] = None, use_post: bool = False,
) -> Generator[Dict[str, Any], None, None]:
"""Get results of a paginated call that uses 'maxResults', 'startAt', and 'total' attributes.
:param url: URL without any pagination parameters
:param results_key: The key of the response dict that contains the actual elements to return (varies from calls to calls). Ex.: "items"
:param parameters: If use_post is False, URL parameters. If use_post is True, json encoded body parameters
:param use_post: Use POST instead of GET. Needed if parameters are too long to fit in an URL
"""
parameters = parameters or {}
results_per_page = 1000
parameters["maxResults"] = results_per_page
next = 0
while True:
parameters["startAt"] = next
if use_post:
response = self._session.post(url, json=parameters)
else:
response = self._session.get(url, params=parameters)
response.raise_for_status()
response_json = response.json()
results = response_json[results_key]
if response_json["maxResults"] < results_per_page:
# Some calls limit the maximum value of maxResults
results_per_page = response_json["maxResults"]
parameters["maxResults"] = results_per_page
for result in results:
yield result
next += results_per_page
if next >= response_json["total"]:
return
def _get_paginated_results_with_next_page_link(self, url: str) -> Generator[Dict[str, Any], None, None]:
"""Get results of a call that returns a payload with the lastPage and nextPage attributes"""
is_last_page = False
while not is_last_page:
response = self._session.get(url)
response.raise_for_status()
response_json = response.json()
for result in response_json["values"]:
yield result
is_last_page = response_json.get("lastPage", True)
if not is_last_page:
url = response_json["nextPage"]
def retrieve_worklogs_updated_since(self, start: datetime) -> List[Dict[str, Any]]:
"""Retrieve worklog objects for all worklogs that have been created or updated since the provided datetime
Faster than getting worklogs through issues
"""
worklog_ids: List[str] = []
for worklog_entry in self._get_paginated_results_with_next_page_link(
f"{self._base_url}/worklog/updated?since={int(start.timestamp() * 1000)}"
):
worklog_ids.append(worklog_entry["worklogId"])
worklogs_per_page = 1000
ids_in_groups_per_page = [worklog_ids[i : i + worklogs_per_page] for i in range(0, len(worklog_ids), worklogs_per_page)]
worklogs_by_id: Dict[str, Dict[str, Any]] = {} # Using a dict to remove duplicates returned by the Jira API
# This is kind of a manual pagination. The documentation only states "The returned list of worklogs is limited to 1000 items."
# Doc: https://developer.atlassian.com/cloud/jira/platform/rest/v3/#api-rest-api-3-worklog-list-post
for ids_to_get in ids_in_groups_per_page:
for worklog in self._session.post(f"{self._base_url}/worklog/list", json={"ids": ids_to_get}).json():
# Optionnaly remove the worklogs you don't want (not in the right time period)
worklogs_by_id[worklog["id"]] = worklog
return list(worklogs_by_id.values())
def search_issues(self, jql: str, fields: List[str] = None) -> List[Dict[str, Any]]:
"""Return issues that matches a specified JQL query"""
issues: List[Dict[str, Any]] = []
parameters: Dict[str, Union[str, List[str]]] = {"jql": jql}
if fields:
parameters["fields"] = fields
for issue in self._get_paginated_results(f"{self._base_url}/search", parameters=parameters, results_key="issues", use_post=True):
issues.append(issue)
return issues
# Point 3 - get issues for the retrieved worklogs
def retrieve_issues_for_worklogs(self, worklogs: List[Dict[str, Any]], fields: List[str] = None) -> List[Dict[str, Any]]:
"""Get Issue objects referenced in a list of worklogs"""
return self.search_issues(f"id in ({','.join(str(issue_id) for issue_id in set(worklog['issueId'] for worklog in worklogs))})", fields=fields)
# Example usage
os.environ["JIRA_SERVER"] = "https://mycompany.atlassian.net/"
client = JiraClient("me@companyname.com", "my_api_token_12345")
recent_worklogs = client.retrieve_worklogs_updated_since(datetime.now() - timedelta(days=14))
@apollovy
Copy link
Copy Markdown

apollovy commented Jun 21, 2020

Line 3:

from typing import Any, Dict, Generator, List, Union

Line 49:

        results_per_page = 1000

Line 125:

        return self.search_issues(f"id in ({','.join(str(issue_id) for issue_id in set(worklog['issueId'] for worklog in worklogs))})", fields=fields)

@dblanchette
Copy link
Copy Markdown
Author

Thank you for the corrections @apollovy, edited the gist.

@kolomicenko
Copy link
Copy Markdown

kolomicenko commented Oct 23, 2020

Line 100:

ids_in_groups_per_page = [worklog_ids[i : i + worklogs_per_page - 1] for i in range(0, len(worklog_ids), worklogs_per_page)]

@dblanchette
Copy link
Copy Markdown
Author

Line 100:

ids_in_groups_per_page = [worklog_ids[i : i + worklogs_per_page - 1] for i in range(0, len(worklog_ids), worklogs_per_page)]

Are you sure about this?

Let's say we have 32 worklogs IDs (here identified by the numbers 0 to 31)

>>> worklog_ids
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]

If we list 10 per_page, we get 3 lists of 10 and one list of 2:

>>> [worklog_ids[i : i + worklogs_per_page] for i in range(0, len(worklog_ids), worklogs_per_page)]
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [30, 31]]

With your correction, we miss number 9, 19, and 29:

[[0, 1, 2, 3, 4, 5, 6, 7, 8], [10, 11, 12, 13, 14, 15, 16, 17, 18], [20, 21, 22, 23, 24, 25, 26, 27, 28], [30, 31]]```

@kolomicenko
Copy link
Copy Markdown

Of course you're right. That's not a good correction.

However, there still must be something wrong with the code, somewhere above the place that I pointed out. The list worklog_ids returned by the generator has some specific items duplicated. Like this:

>>> worklog_ids
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31]

@dblanchette
Copy link
Copy Markdown
Author

I think what can happen is that new entries were added in between paginated calls so the list shifts. Let's say we request the 100 more recent entries, then a user creates a new worklog entry and we request entries 101 to 200, then entry 101 is the previous entry 100.

This can be solved by removing duplicates:
Line 109

return list(set(worklogs))

Makes sense?

@kolomicenko
Copy link
Copy Markdown

It is reproducible and it is actually the Jira API. It builds the nextPage URL using the timestamp of the last item on current page. As a consequence, the next page always starts with that last item from the previous page (plus other items with the same timestamp). I agree removing the duplicates as you suggested makes sense.

@dblanchette
Copy link
Copy Markdown
Author

Interesting, I modified the gist. Thanks and have a nice day!

@abhii-singh
Copy link
Copy Markdown

thanks for the quick snippet . I tried using it as is ( with my personal details ) throws some error at :
An error was encountered: unhashable type: 'dict' Traceback (most recent call last): File "<stdin>", line 106, in retrieve_worklogs_updated_since TypeError: unhashable type: 'dict'

Any leads or I can debug it

@jjoshm
Copy link
Copy Markdown

jjoshm commented Jan 14, 2021

@singh-ab
you can try this version

def retrieve_worklogs_updated_since(self, start: datetime) -> List[Dict[str, Any]]:
        """Retrieve worklog objects for all worklogs that have been created or updated since the provided datetime
        Faster than getting worklogs through issues
        """
        worklog_ids: List[str] = []
        for worklog_entry in self._get_paginated_results_with_next_page_link(
            f"{self._base_url}/worklog/updated?since={int(start.timestamp() * 1000)}"
        ):
            worklog_ids.append(worklog_entry["worklogId"])

        worklogs_per_page = 1000
        ids_in_groups_per_page = [worklog_ids[i : i + worklogs_per_page] for i in range(0, len(worklog_ids), worklogs_per_page)]
        worklogs: List[Dict[str, Any]] = []
        # This is kind of a manual pagination. The documentation only states "The returned list of worklogs is limited to 1000 items."
        # Doc: https://developer.atlassian.com/cloud/jira/platform/rest/v3/#api-rest-api-3-worklog-list-post
        for ids_to_get in ids_in_groups_per_page:
            for worklog in self._session.post(f"{self._base_url}/worklog/list", json={"ids": ids_to_get}).json():
                # Optionnaly remove the worklogs you don't want (not in the right time period)
                worklogs.append(worklog)
        
        # Remove duplicates returned by the Jira API
        worklogs_clean = []
        [worklogs_clean.append(obj) for obj in worklogs if obj not in worklogs_clean]

        return worklogs_clean

@dblanchette
Copy link
Copy Markdown
Author

thanks for the quick snippet . I tried using it as is ( with my personal details ) throws some error at :
An error was encountered: unhashable type: 'dict' Traceback (most recent call last): File "<stdin>", line 106, in retrieve_worklogs_updated_since TypeError: unhashable type: 'dict'

Any leads or I can debug it

@singh-ab The issue should be fixed now

Thanks for the help @joshmuente !

@aquac
Copy link
Copy Markdown

aquac commented Dec 6, 2021

@dblanchette Thanks a lot for this gist!
Is there any reason why you are not using
https://pypi.org/project/jira/
?

@dblanchette
Copy link
Copy Markdown
Author

@aquac I don't know if this has been fixed since, but it did not support pagination at time of creating this gist.

@svgincan
Copy link
Copy Markdown

@dblanchette Running this script returns blank. I'm just updating the script with my URL, login email address and API token.

//
os.environ["JIRA_SERVER"] = "https://mycompany.atlassian.net/"
client = JiraClient("me@companyname.com", "my_api_token_12345")
//

Am I missing something ?

@dblanchette
Copy link
Copy Markdown
Author

@svgincan The script does not retrun anything, it is meant to be included in another project.

Depending on your needs, you may want to print recent_worklogs or process it more, for example.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment