- 
      
 - 
        
Save datavudeja/d5855fb42ad844516d23bfa08b221941 to your computer and use it in GitHub Desktop.  
    Null-safe comparison accessor for Pandas
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import pandas as pd | |
| @pd.api.extensions.register_series_accessor("nullsafe") | |
| class NullSafeSeriesAccessor: | |
| """ | |
| Null-safe comparison accessor for Pandas Series. | |
| This is equivalent to a null-safe equal operator in SQL (<=>) where | |
| +-------------+---------------+-----+ | |
| |Left Operand |Right Operand |<=> | | |
| +-------------+---------------+-----+ | |
| |NULL |Any value |False| | |
| |Any value |NULL |False| | |
| |NULL |NULL |True | | |
| +-------------+---------------+-----+ | |
| So, equal method is same as "<=>", while not_equal is the opposite of that. | |
| Examples | |
| -------- | |
| >>> data = {"left": [1, 2, None, 4], | |
| "right": [1, None, None, 4]} | |
| >>> df = pd.DataFrame(data) | |
| >>> print(df) | |
| left right | |
| 0 1.0 1.0 | |
| 1 2.0 NaN | |
| 2 NaN NaN | |
| 3 4.0 4.0 | |
| Comparing using equals operator (notice index 2) | |
| >>> df["left"] == df["right"] | |
| 0 True | |
| 1 False | |
| 2 False | |
| 3 True | |
| Using the nullsafe accessor: | |
| >>> from nullsafe import NullSafeSeriesAccessor | |
| >>> print(df["left"].nullsafe.equal(df["right"])) | |
| 0 True | |
| 1 False | |
| 2 True | |
| 3 True | |
| """ | |
| def __init__(self, pandas_obj): | |
| self._obj = pandas_obj | |
| @staticmethod | |
| def _null_safe_comparison(left, right): | |
| """ | |
| Perform null-safe comparison between two Series. | |
| source: https://stackoverflow.com/a/19322739 | |
| """ | |
| return (left == right) | ((left != left) & (right != right)) | |
| def equal(self, other): | |
| """ | |
| Compare null-safe equality with another Series. | |
| Returns a boolean mask. | |
| """ | |
| return self._null_safe_comparison(self._obj, other) | |
| def not_equal(self, other): | |
| """ | |
| Compare null-safe non-equality with another Series. | |
| Returns a boolean mask. | |
| """ | |
| return ~self._null_safe_comparison(self._obj, other) | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment