Skip to content

Instantly share code, notes, and snippets.

@Mizux
Last active May 23, 2023 22:18
Show Gist options
  • Save Mizux/540d4184d761a49336a41311e4babc8d to your computer and use it in GitHub Desktop.
Save Mizux/540d4184d761a49336a41311e4babc8d to your computer and use it in GitHub Desktop.
filtering.py
#!/usr/bin/env python3
'''Simple test
'''
from ortools.sat.python import cp_model
import pandas as pd
# Define the dataframe with feature columns and the target column
data = {
'A': [1.59, 0.9, 2.82,2.44,2.61],
'B': [0.68,0.1,1.3,1.64,1.59],
'C': [2.15,-0.45,2.91,5.2,2.72],
'P': [-0.5,0,1.2,-0.1,1.5]
}
df = pd.DataFrame(data)
# Multiply by large int to frame the problem in integers
df = df * 100
model = cp_model.CpModel()
# Contraint 1: A is between bounds
A_MIN_LB = int(df['A'].min()-1)
A_MAX_UB = int(df['A'].max()+1)
print(f'A range: [{A_MIN_LB}, {A_MAX_UB}]')
a_lb = model.NewIntVar(A_MIN_LB, A_MAX_UB, 'a_lb')
a_ub = model.NewIntVar(A_MIN_LB,A_MAX_UB , 'a_ub')
model.Add(a_lb < a_ub)
# Contraint 2: B is between bounds
B_MIN_LB = int(df['B'].min()-1)
B_MAX_UB = int(df['B'].max()+1)
print(f'B range: [{B_MIN_LB}, {B_MAX_UB}]')
b_lb = model.NewIntVar(B_MIN_LB, B_MAX_UB, 'b_lb')
b_ub = model.NewIntVar(B_MIN_LB,B_MAX_UB , 'b_ub')
model.Add(b_lb < b_ub)
# Contraint 3: C is between bounds
C_MIN_LB = int(df['C'].min()-1)
C_MAX_UB = int(df['C'].max()+1)
print(f'C range: [{C_MIN_LB}, {C_MAX_UB}]')
c_lb = model.NewIntVar(C_MIN_LB, C_MAX_UB, 'c_lb')
c_ub = model.NewIntVar(C_MIN_LB, C_MAX_UB , 'c_ub')
model.Add(c_lb < c_ub)
# Define a binary variable representing inclusion of each row in the DataFrame
x = {}
for i in range(len(df)):
x[i] = model.NewBoolVar(f'x[{i}]')
df_a = int(df['A'][i])
df_b = int(df['B'][i])
df_c = int(df['C'][i])
print(f'tuple[{i}]: (A:{df_a}, B:{df_b}, C:{df_c}), P:{df["P"][i]}')
## Intermediate bools for channeling
is_A_greater = model.NewBoolVar(f'is_A_greater_{i}')
is_A_lesser = model.NewBoolVar(f'is_A_lesser_{i}')
model.Add(a_lb < df_a).OnlyEnforceIf(is_A_greater)
model.Add(a_lb >= df_a).OnlyEnforceIf(is_A_greater.Not())
model.Add(df_a < a_ub).OnlyEnforceIf(is_A_lesser)
model.Add(df_a >= a_ub).OnlyEnforceIf(is_A_lesser.Not())
is_B_greater = model.NewBoolVar(f'is_B_greater_{i}')
is_B_lesser = model.NewBoolVar(f'is_B_lesser_{i}')
model.Add(b_lb < df_b).OnlyEnforceIf(is_B_greater)
model.Add(b_lb >= df_b).OnlyEnforceIf(is_B_greater.Not())
model.Add(df_b < b_ub).OnlyEnforceIf(is_B_lesser)
model.Add(df_b >= b_ub).OnlyEnforceIf(is_B_lesser.Not())
is_C_greater = model.NewBoolVar(f'is_C_greater_{i}')
is_C_lesser = model.NewBoolVar(f'is_C_lesser_{i}')
model.Add(c_lb < df_c).OnlyEnforceIf(is_C_greater)
model.Add(c_lb >= df_c).OnlyEnforceIf(is_C_greater.Not())
model.Add(df_c < c_ub).OnlyEnforceIf(is_C_lesser)
model.Add(df_c >= c_ub).OnlyEnforceIf(is_C_lesser.Not())
# Assign true to x[i] only if all all conditions met
model.AddBoolAnd(
is_A_greater, is_A_lesser,
is_B_greater, is_B_lesser,
is_C_greater, is_C_lesser
).OnlyEnforceIf(x[i])
# Assign false to x[i] if at least one condition is *not* met
model.AddBoolOr(
is_A_greater.Not(), is_A_lesser.Not(),
is_B_greater.Not(), is_B_lesser.Not(),
is_C_greater.Not(), is_C_lesser.Not()
).OnlyEnforceIf(x[i].Not())
# Contraint 3: At least 3 items are not filtered
model.Add(sum(x[i] for i in range(len(df))) >= 3)
# Objective
#objective_terms = []
#for i in range(len(df)):
# objective_terms.append(x[i] * int(df['P'][i]))
#model.Maximize(sum(objective_terms))
model.Maximize(sum(x[i] * int(df['P'][i]) for i in range(len(df))))
solver = cp_model.CpSolver()
status = solver.Solve(model)
print(f'Solve status: {solver.StatusName(status)}') # "INFEASIBLE" :( ?
print(f'Optimal objective value: {solver.ObjectiveValue()}') # "0"
if status in (cp_model.OPTIMAL, cp_model.FEASIBLE):
print(f'{solver.Value(a_lb)} < a < {solver.Value(a_ub)}')
print(f'{solver.Value(b_lb)} < b < {solver.Value(b_ub)}')
print(f'{solver.Value(c_lb)} < c < {solver.Value(c_ub)}')
for i in range(len(df)):
print(f'x[{i}]: {solver.BooleanValue(x[i])}')
else:
print('No solution found.')

possible output

%./filtering_sat.py
A range: [89, 283]
B range: [9, 165]
C range: [-46, 521]
tuple[0]: (A:159, B:68, C:215), P:-50.0
tuple[1]: (A:90, B:10, C:-45), P:0.0
tuple[2]: (A:282, B:130, C:291), P:120.0
tuple[3]: (A:244, B:164, C:520), P:-10.0
tuple[4]: (A:261, B:159, C:272), P:150.0

Solve status: OPTIMAL
Optimal objective value: 260.0
159 < a < 283
10 < b < 165
0 < c < 521
x[0]: False
x[1]: False
x[2]: True
x[3]: True
x[4]: True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment