meta-ks · November 30, 2023 09:47
diff --git a/ts_interp.py b/ts_interp.py
 def interpolate_timestamps(df, col):
    # Convert the timestamp column to numeric values (in us since Unix epoch)
    numeric_ts = pd.to_numeric(df[col])

    # Replace any numeric values that are less than 0 with NaN
    # This step is crucial because pd.to_numeric() can convert NaT (Not-a-Time) values
    # to large negative numbers. By replacing these with NaN, we ensure they are
    # correctly identified as missing values for interpolation.
    numeric_ts[numeric_ts < 0] = np.nan

    # Perform linear interpolation on the numeric representation of the timestamps.
    # This step fills in the missing values (NaNs) by linearly interpolating between
    # known values. The interpolation is based on the numeric (us) values.
    interpolated_numeric_ts = numeric_ts.interpolate()

    # Convert the interpolated numeric values back to datetime objects.
    # The unit='us' specifies that the numeric values are in microseconds.
    # This conversion is necessary because the interpolation was performed on
    # numeric values, not on the actual Timestamps.
    df.loc[df[col].isnull(), col] = pd.to_datetime(interpolated_numeric_ts, unit='us')[df[col].isnull()]

    return df
	def interpolate_timestamps(df, col):
	# Convert the timestamp column to numeric values (in us since Unix epoch)
	numeric_ts = pd.to_numeric(df[col])

	# Replace any numeric values that are less than 0 with NaN
	# This step is crucial because pd.to_numeric() can convert NaT (Not-a-Time) values
	# to large negative numbers. By replacing these with NaN, we ensure they are
	# correctly identified as missing values for interpolation.
	numeric_ts[numeric_ts < 0] = np.nan

	# Perform linear interpolation on the numeric representation of the timestamps.
	# This step fills in the missing values (NaNs) by linearly interpolating between
	# known values. The interpolation is based on the numeric (us) values.
	interpolated_numeric_ts = numeric_ts.interpolate()

	# Convert the interpolated numeric values back to datetime objects.
	# The unit='us' specifies that the numeric values are in microseconds.
	# This conversion is necessary because the interpolation was performed on
	# numeric values, not on the actual Timestamps.
	df.loc[df[col].isnull(), col] = pd.to_datetime(interpolated_numeric_ts, unit='us')[df[col].isnull()]

	return df