Skip to content

Instantly share code, notes, and snippets.

@abramsymons
Created July 3, 2022 19:25
Show Gist options
  • Save abramsymons/fc73387fa3fcf213952dcc8d1a41f541 to your computer and use it in GitHub Desktop.
Save abramsymons/fc73387fa3fcf213952dcc8d1a41f541 to your computer and use it in GitHub Desktop.
import numpy as np
import math
# prices = [1, 1.1, 0, 1.1, 1.0, 1.2, 100, 100, 100, 1.1, 1.3, 5, 1.2, 1.1, .9, .95, 1.1]
prices = [200, 200, 300, 3, 400, 400, 400, 200, 300, 300, 500, 600, 90]
threshold = 2
# outlier detection using Z score and removing them
def removeOutlier(prices):
if len(prices) == 0:
return prices
mean = np.mean(prices)
std = np.std(prices)
return [m for m in prices if abs((m - mean) / std) < threshold]
def main():
# use log of prices to have better viewpoint of them
# suppose price >= 0 and use price + 1 to have positive log values
logPrices = [round(math.log(m + 1), 3) for m in prices]
logOutlierRemoved = removeOutlier(logPrices)
# try to detect smaller outliers once again after removing bigger ones
logOutlierRemoved = removeOutlier(logOutlierRemoved)
removed = [m for i, m in enumerate(prices) if logPrices[i] not in logOutlierRemoved]
print('removed:', removed)
outlierRemoved = [m for i, m in enumerate(prices) if logPrices[i] in logOutlierRemoved]
print('outlier removed:', outlierRemoved)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment