Last active
February 6, 2024 19:49
-
-
Save kylebgorman/0cc8f42f870b8b1a07147ddf0fb44022 to your computer and use it in GitHub Desktop.
Log-odds calculations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Log-odds computations.""" | |
from libc.math cimport log, sqrt | |
from libc.stdint cimport int64_t | |
ctypedef int64_t int64 | |
cpdef double log_odds(int64 c, int64 n): | |
""" | |
log_odds(c1, c2) | |
Computes log odds for count data. | |
Args: | |
c: count of event. | |
n: count of all events for the first sample. | |
Returns: | |
The log odds. | |
""" | |
return log(c) - log(n - c) | |
cpdef double log_odds_ratio(int64 c1, int64 n1, int64 c2, int64 n2): | |
""" | |
log_odds_ratio(c1, n1, c2, n2) | |
Computes log odds for two counts. | |
Args: | |
c1: count of the event in the first sample. | |
n1: count of all events for the first sample. | |
c2: count in the event in the second sample. | |
n1: count of all events for the second sample. | |
Returns: | |
Log odds. | |
""" | |
return log_odds(c1, n1) - log_odds(c2, n2) | |
cpdef double log_odds_ratio_idp(int64 c1, int64 n1, int64 c2, int64 n2, | |
int64 c3, int64 n3): | |
""" | |
log_odds_ratio_idp(c1, n1, c2, n2, c3, n3) | |
Computes log odds ratio with an informative Dirichlet prior, as in: | |
Monroe, B. L., Colaresi, M. P. and Quinn, K. M. 2009. Fightin' words: | |
Lexical feature selection and evaluation for identifying the content of | |
political conflict. Political Analysis 16: 372-403. | |
Args: | |
c1: count of the event in the first sample. | |
n1: count of all events for the first sample. | |
c2: count of the event in the second sample. | |
n1: count of all events for the second sample. | |
c3: count of the event in the third (i.e., the prior) sample. | |
n3: count of all events for the third (i.e., the prior) sample. | |
Returns: | |
Weighted log odds. | |
""" | |
cdef double c1p = c1 + c3 | |
cdef double c2p = c2 + c3 | |
cdef double numerator = log(c1p / (n1 + n3)) - log(c2p / (n2 + n3)) | |
cdef double sigma = sqrt(1. / c1p + 1. / c2p) | |
return numerator / sigma |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Unit tests for the log_odds module.""" | |
import unittest | |
import log_odds | |
class CountTest(unittest.TestCase): | |
"""Data for the word "I", extracted from: | |
http://languagelog.ldc.upenn.edu/nll/?p=21068 | |
""" | |
@classmethod | |
def setUpClass(cls): | |
# DJT speeches. | |
cls.c1 = 666 | |
cls.n1 = 14746 | |
# GWB's speeches. | |
cls.c2 = 356 | |
cls.n2 = 14746 | |
# Some unknown background corpus. | |
cls.c3 = 1022 | |
# This last number has to be inferred from the counts per million. | |
cls.n3 = 29175 | |
def testLogOdds(self): | |
self.assertAlmostEqual(-3.051221, log_odds.log_odds(self.c1, self.n1)) | |
def testLogOddsRatio(self): | |
self.assertAlmostEqual(0.6481371, | |
log_odds.log_odds_ratio(self.c1, self.n1, self.c2, self.n2)) | |
def testLogOddsRatioIDP(self): | |
self.assertAlmostEqual(5.5889577, | |
log_odds.log_odds_ratio_idp(self.c1, self.n1, self.c2, self.n2, | |
self.c3, self.c3)) | |
if __name__ == "__main__": | |
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from setuptools import setup | |
from Cython.Build import cythonize | |
setup(name="log_odds", | |
version="0.2", | |
author="Kyle Gorman", | |
author_email="[email protected]", | |
install_requires=["Cython>=0.28.5"], | |
ext_modules=cythonize(["log_odds.pyx"]), | |
test_suite="log_odds_test", | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment