Skip to content

Instantly share code, notes, and snippets.

@lucidfrontier45
Last active June 22, 2025 02:40
Show Gist options
  • Save lucidfrontier45/3948d8a912ece8dcaada2dd88d3329ba to your computer and use it in GitHub Desktop.
Save lucidfrontier45/3948d8a912ece8dcaada2dd88d3329ba to your computer and use it in GitHub Desktop.
XGBoost model JSON experiment
{
"learner": {
"attributes": {},
"feature_names": [],
"feature_types": [],
"gradient_booster": {
"model": {
"gbtree_model_param": {
"num_parallel_tree": "1",
"num_trees": "2"
},
"iteration_indptr": [
0,
1,
2
],
"tree_info": [
0,
0
],
"trees": [
{
"base_weights": [
7.611128E-8,
-4.0516657E-1,
5.1883173E-1,
-1.9054249E-1,
2.122475E-2,
-5.6715857E-2,
2.0684828E-1
],
"categories": [],
"categories_nodes": [],
"categories_segments": [],
"categories_sizes": [],
"default_left": [
0,
0,
0,
0,
0,
0,
0
],
"id": 0,
"left_children": [
1,
3,
5,
-1,
-1,
-1,
-1
],
"loss_changes": [
5.288966E1,
1.550733E1,
1.3458624E1,
0E0,
0E0,
0E0,
0E0
],
"parents": [
2147483647,
0,
0,
1,
1,
2,
2
],
"right_children": [
2,
4,
6,
-1,
-1,
-1,
-1
],
"split_conditions": [
2.3909327E-1,
7.4216795E-1,
-1.2258618E0,
-1.9054249E-1,
2.122475E-2,
-5.6715857E-2,
2.0684828E-1
],
"split_indices": [
2,
1,
1,
0,
0,
0,
0
],
"split_type": [
0,
0,
0,
0,
0,
0,
0
],
"sum_hessian": [
2.496E2,
1.402752E2,
1.0932481E2,
9.43488E1,
4.5926403E1,
2.1216002E1,
8.81088E1
],
"tree_param": {
"num_deleted": "0",
"num_feature": "3",
"num_nodes": "7",
"size_leaf_vector": "1"
}
},
{
"base_weights": [
5.4557866E-4,
-2.3912597E-1,
5.098165E-1,
-1.9740286E-1,
-1.221089E-2,
-1.3063847E-2,
1.9067912E-1
],
"categories": [],
"categories_nodes": [],
"categories_segments": [],
"categories_sizes": [],
"default_left": [
0,
0,
0,
0,
0,
0,
0
],
"id": 1,
"left_children": [
1,
3,
5,
-1,
-1,
-1,
-1
],
"loss_changes": [
3.0489908E1,
1.40914545E1,
5.591589E0,
0E0,
0E0,
0E0,
0E0
],
"parents": [
2147483647,
0,
0,
1,
1,
2,
2
],
"right_children": [
2,
4,
6,
-1,
-1,
-1,
-1
],
"split_conditions": [
6.910422E-1,
-6.351771E-1,
-1.2981012E0,
-1.9740286E-1,
-1.221089E-2,
-1.3063847E-2,
1.9067912E-1
],
"split_indices": [
2,
1,
0,
0,
0,
0,
0
],
"split_type": [
0,
0,
0,
0,
0,
0,
0
],
"sum_hessian": [
2.4780023E2,
1.6886157E2,
7.893866E1,
5.353326E1,
1.1532831E2,
1.47409E1,
6.419776E1
],
"tree_param": {
"num_deleted": "0",
"num_feature": "3",
"num_nodes": "7",
"size_leaf_vector": "1"
}
}
]
},
"name": "gbtree"
},
"learner_model_param": {
"base_score": "5.2E-1",
"boost_from_average": "1",
"num_class": "0",
"num_feature": "3",
"num_target": "1"
},
"objective": {
"name": "binary:logistic",
"reg_loss_param": {
"scale_pos_weight": "1"
}
}
},
"version": [
3,
0,
2
]
}
import itertools
import json
import numpy as np
import xgboost as xgb
rng = np.random.default_rng(0)
n = 1000
d = 3
x = rng.uniform(-2, 2, size=(n, d))
def test_model(model_file: str):
print(f"Testing model: {model_file}")
booster = xgb.Booster(model_file=model_file)
y = booster.predict(xgb.DMatrix(x))
y = np.array(sorted(set(y))).round(5)
print(y)
model_dict = json.load(open(model_file))["learner"]
base_score = float(model_dict["learner_model_param"]["base_score"])
trees = model_dict["gradient_booster"]["model"]["trees"]
leaf_weight_list = []
for tree in trees:
leaf_node_ids = [
i for (i, splt_idx) in enumerate(tree["split_indices"]) if splt_idx == 0
]
leaf_weights = [tree["base_weights"][i] for i in leaf_node_ids]
leaf_weight_list.append(leaf_weights)
y = []
for weights in itertools.product(*leaf_weight_list):
leaf_value = base_score + sum(weights)
y.append(leaf_value)
y = np.array(sorted(y)).round(5)
print(y)
test_model("regressor.json")
test_model("classifier.json")
test_model("zero_weights.json")
{
"learner": {
"attributes": {},
"feature_names": [],
"feature_types": [],
"gradient_booster": {
"model": {
"gbtree_model_param": {
"num_parallel_tree": "1",
"num_trees": "2"
},
"iteration_indptr": [
0,
1,
2
],
"tree_info": [
0,
0
],
"trees": [
{
"base_weights": [
-1.117401E-8,
-2.7012718E-1,
3.1956217E-1,
-1.3428491E-1,
-4.2649408E-4,
-6.001738E-2,
1.3094717E-1
],
"categories": [],
"categories_nodes": [],
"categories_segments": [],
"categories_sizes": [],
"default_left": [
0,
0,
0,
0,
0,
0,
0
],
"id": 0,
"left_children": [
1,
3,
5,
-1,
-1,
-1,
-1
],
"loss_changes": [
8.649507E1,
2.5896297E1,
2.7975399E1,
0E0,
0E0,
0E0,
0E0
],
"parents": [
2147483647,
0,
0,
1,
1,
2,
2
],
"right_children": [
2,
4,
6,
-1,
-1,
-1,
-1
],
"split_conditions": [
1.5932372E-1,
4.2627653E-1,
-1.2470143E0,
-1.3428491E-1,
-4.2649408E-4,
-6.001738E-2,
1.3094717E-1
],
"split_indices": [
2,
1,
1,
0,
0,
0,
0
],
"split_type": [
0,
0,
0,
0,
0,
0,
0
],
"sum_hessian": [
1E3,
5.42E2,
4.58E2,
3.26E2,
2.16E2,
8.4E1,
3.74E2
],
"tree_param": {
"num_deleted": "0",
"num_feature": "3",
"num_nodes": "7",
"size_leaf_vector": "1"
}
},
{
"base_weights": [
-6.371168E-5,
-1.5287511E-1,
3.3475894E-1,
-8.8121556E-2,
1.6029611E-2,
3.1154847E-2,
1.4547335E-1
],
"categories": [],
"categories_nodes": [],
"categories_segments": [],
"categories_sizes": [],
"default_left": [
0,
0,
0,
0,
0,
0,
0
],
"id": 1,
"left_children": [
1,
3,
5,
-1,
-1,
-1,
-1
],
"loss_changes": [
5.1267056E1,
2.000975E1,
1.08365135E1,
0E0,
0E0,
0E0,
0E0
],
"parents": [
2147483647,
0,
0,
1,
1,
2,
2
],
"right_children": [
2,
4,
6,
-1,
-1,
-1,
-1
],
"split_conditions": [
7.101826E-1,
4.2627653E-1,
-5.257713E-1,
-8.8121556E-2,
1.6029611E-2,
3.1154847E-2,
1.4547335E-1
],
"split_indices": [
2,
1,
1,
0,
0,
0,
0
],
"split_type": [
0,
0,
0,
0,
0,
0,
0
],
"sum_hessian": [
1E3,
6.87E2,
3.13E2,
4.08E2,
2.79E2,
1.24E2,
1.89E2
],
"tree_param": {
"num_deleted": "0",
"num_feature": "3",
"num_nodes": "7",
"size_leaf_vector": "1"
}
}
]
},
"name": "gbtree"
},
"learner_model_param": {
"base_score": "5.0801497E-2",
"boost_from_average": "1",
"num_class": "0",
"num_feature": "3",
"num_target": "1"
},
"objective": {
"name": "reg:squarederror",
"reg_loss_param": {
"scale_pos_weight": "1"
}
}
},
"version": [
3,
0,
2
]
}
import numpy as np
import xgboost as xgb
rng = np.random.default_rng(0)
n = 1000
w = [0.1, 0.2, 0.3]
d = len(w)
x = rng.uniform(-2, 2, size=(n, d))
s = rng.normal(0, 1.0, size=n)
y = np.dot(x, w) + s
c = (y > 0.0).astype(np.int64)
model = xgb.XGBRegressor(n_estimators=2, max_depth=2).fit(x, y)
model.get_booster().save_model("regressor.json")
model = xgb.XGBClassifier(n_estimators=2, max_depth=2).fit(x, c)
model.get_booster().save_model("classifier.json")
{
"learner": {
"attributes": {},
"feature_names": [],
"feature_types": [],
"gradient_booster": {
"model": {
"gbtree_model_param": {
"num_parallel_tree": "1",
"num_trees": "2"
},
"iteration_indptr": [
0,
1,
2
],
"tree_info": [
0,
0
],
"trees": [
{
"base_weights": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0
],
"categories": [],
"categories_nodes": [],
"categories_segments": [],
"categories_sizes": [],
"default_left": [
0,
0,
0,
0,
0,
0,
0
],
"id": 0,
"left_children": [
1,
3,
5,
-1,
-1,
-1,
-1
],
"loss_changes": [
8.649507E1,
2.5896297E1,
2.7975399E1,
0E0,
0E0,
0E0,
0E0
],
"parents": [
2147483647,
0,
0,
1,
1,
2,
2
],
"right_children": [
2,
4,
6,
-1,
-1,
-1,
-1
],
"split_conditions": [
1.5932372E-1,
4.2627653E-1,
-1.2470143E0,
-1.3428491E-1,
-4.2649408E-4,
-6.001738E-2,
1.3094717E-1
],
"split_indices": [
2,
1,
1,
0,
0,
0,
0
],
"split_type": [
0,
0,
0,
0,
0,
0,
0
],
"sum_hessian": [
1E3,
5.42E2,
4.58E2,
3.26E2,
2.16E2,
8.4E1,
3.74E2
],
"tree_param": {
"num_deleted": "0",
"num_feature": "3",
"num_nodes": "7",
"size_leaf_vector": "1"
}
},
{
"base_weights": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0
],
"categories": [],
"categories_nodes": [],
"categories_segments": [],
"categories_sizes": [],
"default_left": [
0,
0,
0,
0,
0,
0,
0
],
"id": 1,
"left_children": [
1,
3,
5,
-1,
-1,
-1,
-1
],
"loss_changes": [
5.1267056E1,
2.000975E1,
1.08365135E1,
0E0,
0E0,
0E0,
0E0
],
"parents": [
2147483647,
0,
0,
1,
1,
2,
2
],
"right_children": [
2,
4,
6,
-1,
-1,
-1,
-1
],
"split_conditions": [
7.101826E-1,
4.2627653E-1,
-5.257713E-1,
-8.8121556E-2,
1.6029611E-2,
3.1154847E-2,
1.4547335E-1
],
"split_indices": [
2,
1,
1,
0,
0,
0,
0
],
"split_type": [
0,
0,
0,
0,
0,
0,
0
],
"sum_hessian": [
1E3,
6.87E2,
3.13E2,
4.08E2,
2.79E2,
1.24E2,
1.89E2
],
"tree_param": {
"num_deleted": "0",
"num_feature": "3",
"num_nodes": "7",
"size_leaf_vector": "1"
}
}
]
},
"name": "gbtree"
},
"learner_model_param": {
"base_score": "5.0801497E-2",
"boost_from_average": "1",
"num_class": "0",
"num_feature": "3",
"num_target": "1"
},
"objective": {
"name": "reg:squarederror",
"reg_loss_param": {
"scale_pos_weight": "1"
}
}
},
"version": [
3,
0,
2
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment