Last active
August 30, 2019 02:48
-
-
Save gghatano/786c07c488e141aa9bccfc10331903c9 to your computer and use it in GitHub Desktop.
PWSCUP2019_publicfile
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Created by Takao Murakami Jun 18, 2019 (last updated: Aug 22, 2019). | |
Description: | |
Evaluate security (ID-disclosure). | |
Usage: | |
EvalSecR.py [Pseudo-ID Table (in)] [Estimated Table (in)] | |
""" | |
import csv | |
import sys | |
################################# Parameters ################################## | |
#sys.argv = ["EvalSecI.py", "../Data_Anonymize_Shuffle/ptable_team001_data01_IDP.csv", "../Data_IDDisclose/etable_team020-001_data01_IDP.csv"] | |
if len(sys.argv) < 3: | |
print("Usage:",sys.argv[0],"[Pseudo-ID Table (in)] [Estimated Table (in)]" ) | |
sys.exit(0) | |
# Number of users | |
UserNum = 2000 | |
# Pseudo-ID table file (input) | |
PTableFile = sys.argv[1] | |
# Estimated table file (input) | |
ETableFile = sys.argv[2] | |
#################################### Main ##################################### | |
# Initialization | |
ptable = {} | |
etable = {} | |
# Read a pseudo-ID table & estimated table | |
f = open(PTableFile, "r") | |
g = open(ETableFile, "r") | |
reader = csv.reader(f) | |
next(reader) | |
g.readline() | |
for lst in reader: | |
# Read a pseudo-ID table | |
# user_id = int(lst[0]) | |
user_id = int(lst[0])-UserNum | |
ptable[user_id] = int(lst[1]) | |
# Read an estimated table | |
etable[user_id] = int(g.readline()) | |
f.close() | |
g.close() | |
## Read an estimated table | |
#f = open(ETableFile, "r") | |
#reader = csv.reader(f) | |
#next(reader) | |
#for lst in reader: | |
# user_id = int(lst[0]) | |
# user_id = int(lst[0])-UserNum | |
# etable[user_id] = int(lst[1]) | |
#f.close() | |
# Calculate the re-identification rate --> reid_rate | |
reid_rate = 0 | |
for i in ptable: | |
if ptable[i] == etable[i]: | |
reid_rate += 1 | |
reid_rate /= len(ptable) | |
# Reverse reid_rate so that 1 (resp. 0) is the best (resp. worst) score --> avg_rscore | |
avg_rscore = 1 - reid_rate | |
print(avg_rscore) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Created by Takao Murakami Jun 18, 2019 (last updated: Aug 22, 2019). | |
Description: | |
Evaluate security (trace inference). | |
Usage: | |
EvalSecT.py [Original Trace (in)] [Estimated Trace (in)] | |
""" | |
import numpy as np | |
import math | |
import csv | |
import sys | |
################################# Parameters ################################## | |
# Number of users | |
UserNum = 2000 | |
# Minimum of y (latitude) | |
MIN_Y = 35.65 | |
# Maximum of y (latitude) | |
MAX_Y = 35.75 | |
# Minimum of x (longitude) | |
MIN_X = 139.68 | |
# Maximum of x (longitude) | |
MAX_X = 139.8 | |
# Number of regions in the x-term | |
NumRegX = 32 | |
# Number of regions in the y-term | |
NumRegY = 32 | |
# Minimum distance (km) for a score value of zero | |
MinDisZeroScore = 2 | |
# Hospital regions | |
HosRegLst = [2, 43, 50, 147, 150, 152, 174, 183, 186, 205, | |
237, 296, 303, 326, 331, 344, 358, 420, 434, 449, | |
471, 491, 497, 507, 522, 535, 550, 561, 628, 631, | |
708, 771, 782, 821, 871, 883, 995] | |
HosReg = np.zeros(NumRegX*NumRegY) | |
for i in range(len(HosRegLst)): | |
hos_reg_id = HosRegLst[i] - 1 | |
HosReg[hos_reg_id] = 1 | |
#sys.argv = ["EvalSecT.py", "../Data/PWSCup2019_Osaka/orgtraces_team001_data01_IDP.csv", "../Data_TraceInfer/etraces_team020-001_data01_IDP.csv"] | |
if len(sys.argv) < 3: | |
print("Usage:",sys.argv[0],"[Original Trace (in)] [Estimated Trace (in)]" ) | |
sys.exit(0) | |
# Original trace file (input) | |
OrgTraceFile = sys.argv[1] | |
# Estimated trace file (input) | |
EstTraceFile = sys.argv[2] | |
########## Calculate a security loss (tracking) between two regions ########### | |
# [input1]: reg_id1 | |
# [input2]: reg_id2 | |
# [input3]: xc | |
# [input4]: yc | |
# [output1]: tloss | |
# [output2]: weight | |
def CalTLoss(reg_id1, reg_id2, xc, yc): | |
# Region IDs (with zero start) --> reg_id1, reg_id2 | |
reg_id1 -= 1 | |
reg_id2 -= 1 | |
# Calculate y_id1, x_id1, y_id2, x_id2 | |
y_id1 = int(reg_id1 / NumRegX) | |
x_id1 = reg_id1 % NumRegX | |
y_id2 = int(reg_id2 / NumRegX) | |
x_id2 = reg_id2 % NumRegX | |
# Euclidean distance (km) between reg_id1 & reg_id2 --> dist_km | |
# 1 degree of latitude (resp. longitude in TK) = 111 km (resp. 91 km) | |
dist_y_km = (yc[y_id1] - yc[y_id2]) * 111 | |
dist_x_km = (xc[x_id1] - xc[x_id2]) * 91 | |
dist_km = math.sqrt(dist_y_km**2 + dist_x_km**2) | |
if dist_km > MinDisZeroScore: | |
tloss = 0 | |
else: | |
tloss = 1 - dist_km / MinDisZeroScore | |
# If the original region is a hospital region, set weight = 10 | |
if HosReg[reg_id1] == 1: | |
weight = 10 | |
# Otherwise, set weight = 1 | |
else: | |
weight = 1 | |
return tloss, weight | |
#################################### Main ##################################### | |
# Initialization | |
test_trace = {} | |
est_trace = {} | |
# Read a testing trace file & estimated trace file --> test_trace, est_trace | |
f = open(OrgTraceFile, "r") | |
g = open(EstTraceFile, "r") | |
reader = csv.reader(f) | |
next(reader) | |
g.readline() | |
for lst in reader: | |
# Read a testing trace file --> test_trace | |
user_id = int(lst[0]) | |
time_id = int(lst[1]) | |
reg_id = int(lst[2]) | |
test_trace[(user_id, time_id)] = reg_id | |
# Read an estimated trace file --> est_trace | |
reg_id2 = int(g.readline()) | |
est_trace[(user_id, time_id)] = reg_id2 | |
f.close() | |
g.close() | |
## Read an estimated trace file --> est_trace | |
#f = open(EstTraceFile, "r") | |
#reader = csv.reader(f) | |
#next(reader) | |
#for lst in reader: | |
# user_id = int(lst[0]) | |
# time_id = int(lst[1]) | |
# reg_id = int(lst[2]) | |
# est_trace[(user_id, time_id)] = reg_id | |
#f.close() | |
# Calculate the center of each region (NumRegX x NumRegY) --> xc, yc | |
xc = np.zeros(NumRegX) | |
yc = np.zeros(NumRegY) | |
x_width = (MAX_X - MIN_X) / NumRegX | |
y_width = (MAX_Y - MIN_Y) / NumRegY | |
for i in range(NumRegX): | |
xc[i] = MIN_X + x_width * i + x_width / 2 | |
for i in range(NumRegY): | |
yc[i] = MIN_Y + y_width * i + y_width / 2 | |
# Calculate the weighted average security loss (trace inference) --> wavg_tloss | |
wavg_tloss = 0 | |
wsum = 0 | |
for (user_id, time_id) in test_trace: | |
reg_id1 = test_trace[(user_id, time_id)] | |
reg_id2 = est_trace[(user_id, time_id)] | |
tloss, weight = CalTLoss(reg_id1, reg_id2, xc, yc) | |
wavg_tloss += tloss * weight | |
wsum += weight | |
wavg_tloss /= wsum | |
# Reverse avg_tloss so that 1 (resp. 0) is the best (resp. worst) score --> wavg_tscore | |
wavg_tscore = 1 - wavg_tloss | |
print(wavg_tscore) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Created by Takao Murakami Jun 18, 2019 (last updated: Aug 22, 2019). | |
Description: | |
Evaluate utility. | |
Usage: | |
EvalSecT.py [Original Trace (in)] [Anonymized Trace (in)] | |
""" | |
import numpy as np | |
import math | |
import csv | |
import sys | |
################################# Parameters ################################## | |
# Minimum of y (latitude) | |
MIN_Y = 35.65 | |
# Maximum of y (latitude) | |
MAX_Y = 35.75 | |
# Minimum of x (longitude) | |
MIN_X = 139.68 | |
# Maximum of x (longitude) | |
MAX_X = 139.8 | |
# Number of regions in the x-term | |
NumRegX = 32 | |
# Number of regions in the y-term | |
NumRegY = 32 | |
# Minimum distance (km) for a score value of one | |
MinDisOneScore = 2 | |
#sys.argv = ["EvalUtil.py", "../Data/PWSCup2019_Osaka/orgtraces_team001_data01_IDP.csv", "../Data_Anonymize/anotraces_team001_data01_IDP_A5.csv"] | |
if len(sys.argv) < 3: | |
print("Usage:",sys.argv[0],"[Original Trace (in)] [Anonymized Trace (in)]" ) | |
sys.exit(0) | |
# Original trace file (input) | |
OrgTraceFile = sys.argv[1] | |
# Anonymized trace file (input) | |
AnoTraceFile = sys.argv[2] | |
################ Calculate a utility loss between two regions ################# | |
# [input1]: reg_id1 | |
# [input2]: reg_id2 | |
# [input3]: xc | |
# [input4]: yc | |
# [output1]: uloss | |
def CalUtil(reg_id1, reg_id2, xc, yc): | |
# Region IDs (with zero start) --> reg_id1, reg_id2 | |
reg_id1 -= 1 | |
reg_id2 -= 1 | |
# Calculate y_id1, x_id1, y_id2, x_id2 | |
y_id1 = int(reg_id1 / NumRegX) | |
x_id1 = reg_id1 % NumRegX | |
y_id2 = int(reg_id2 / NumRegX) | |
x_id2 = reg_id2 % NumRegX | |
# Euclidean distance (km) between reg_id1 & reg_id2 --> dist_km | |
# 1 degree of latitude (resp. longitude in TK) = 111 km (resp. 91 km) | |
dist_y_km = (yc[y_id1] - yc[y_id2]) * 111 | |
dist_x_km = (xc[x_id1] - xc[x_id2]) * 91 | |
dist_km = math.sqrt(dist_y_km**2 + dist_x_km**2) | |
if dist_km > MinDisOneScore: | |
uloss = 1 | |
else: | |
uloss = dist_km / MinDisOneScore | |
return uloss | |
#################################### Main ##################################### | |
# Initialization | |
test_trace = {} | |
ano_trace = {} | |
# Read the original trace file & anonymized trace file --> test_trace, ano_trace | |
f = open(OrgTraceFile, "r") | |
g = open(AnoTraceFile, "r") | |
reader = csv.reader(f) | |
next(reader) | |
g.readline() | |
for lst in reader: | |
# Read the original trace file --> test_trace | |
user_id = int(lst[0]) | |
time_id = int(lst[1]) | |
reg_id = int(lst[2]) | |
test_trace[(user_id, time_id)] = reg_id | |
# Read an anonymized trace file --> ano_trace | |
ano_reg_id = g.readline().rstrip("\n") | |
ano_trace[(user_id, time_id)] = ano_reg_id | |
f.close() | |
g.close() | |
## Read an anonymized trace file --> ano_trace | |
#f = open(AnoTraceFile, "r") | |
#reader = csv.reader(f) | |
#next(reader) | |
#for lst in reader: | |
# user_id = int(lst[0]) | |
# time_id = int(lst[1]) | |
# ano_reg_id = lst[2] | |
# ano_trace[(user_id, time_id)] = ano_reg_id | |
#f.close() | |
# Calculate the center of each region (NumRegX x NumRegY) --> xc, yc | |
xc = np.zeros(NumRegX) | |
yc = np.zeros(NumRegY) | |
x_width = (MAX_X - MIN_X) / NumRegX | |
y_width = (MAX_Y - MIN_Y) / NumRegY | |
for i in range(NumRegX): | |
xc[i] = MIN_X + x_width * i + x_width / 2 | |
for i in range(NumRegY): | |
yc[i] = MIN_Y + y_width * i + y_width / 2 | |
# Calculate the average utility loss --> avg_uloss | |
avg_uloss = 0 | |
for (user_id, time_id) in test_trace: | |
reg_id1 = test_trace[(user_id, time_id)] | |
reg_id2_lst = ano_trace[(user_id, time_id)].split(" ") | |
# Number of region IDs --> reg_id2_num | |
reg_id2_num = len(reg_id2_lst) | |
# Noise | |
if reg_id2_num == 1 and reg_id2_lst[0] != "*": | |
reg_id2 = int(reg_id2_lst[0]) | |
avg_uloss += CalUtil(reg_id1, reg_id2, xc, yc) | |
# Generalization | |
elif reg_id2_num >= 2: | |
for r in range(reg_id2_num): | |
# Region ID --> reg_id | |
reg_id2 = int(reg_id2_lst[r]) | |
avg_uloss += CalUtil(reg_id1, reg_id2, xc, yc) / reg_id2_num | |
# Location hiding (deletion) | |
else: | |
avg_uloss += 1 | |
avg_uloss /= len(test_trace) | |
# Reverse avg_uloss so that 1 (resp. 0) is the best (resp. worst) score --> avg_uscore | |
avg_uscore = 1 - avg_uloss | |
print(avg_uscore) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment