.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "auto_examples\Trees\RegressionTree.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note :ref:`Go to the end ` to download the full example code. .. rst-class:: sphx-glr-example-title .. _sphx_glr_auto_examples_Trees_RegressionTree.py: Regression using tree based models ======================================== This script evaluates and compares various regression models, including regression trees, random forest, gradient boosting, AdaBoost, XGBoost, and LGBM, using synthetic datasets. .. GENERATED FROM PYTHON SOURCE LINES 7-166 .. rst-class:: sphx-glr-horizontal * .. image-sg:: /auto_examples/Trees/images/sphx_glr_RegressionTree_001.png :alt: RegressionTree :srcset: /auto_examples/Trees/images/sphx_glr_RegressionTree_001.png :class: sphx-glr-multi-img * .. image-sg:: /auto_examples/Trees/images/sphx_glr_RegressionTree_002.png :alt: Gradient boosting regressor, AdaBoost, Extreme gradient boosting regressor, Light gradient boosting machine regressor :srcset: /auto_examples/Trees/images/sphx_glr_RegressionTree_002.png :class: sphx-glr-multi-img * .. image-sg:: /auto_examples/Trees/images/sphx_glr_RegressionTree_003.png :alt: RegressionTree :srcset: /auto_examples/Trees/images/sphx_glr_RegressionTree_003.png :class: sphx-glr-multi-img * .. image-sg:: /auto_examples/Trees/images/sphx_glr_RegressionTree_004.png :alt: Gradient boosting regressor, AdaBoost, Extreme gradient boosting regressor, Light gradient boosting machine regressor :srcset: /auto_examples/Trees/images/sphx_glr_RegressionTree_004.png :class: sphx-glr-multi-img .. rst-class:: sphx-glr-script-out .. code-block:: none XGBoost time with 50 weak learners: 1.5154590999882203 LGBM time with 150 weak learners: 1.1558387000113726 | .. code-block:: Python import torch import matplotlib.pyplot as plt from sklearn.ensemble import GradientBoostingRegressor as gbr, AdaBoostRegressor as abr from sklearn.tree import DecisionTreeRegressor from time import perf_counter from DLL.MachineLearning.SupervisedLearning.Trees import RegressionTree, RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, XGBoostingRegressor, LGBMRegressor from DLL.Data.Preprocessing import data_split n = 100 x = torch.linspace(0, 1, n).unsqueeze(-1) y = 0.2 * torch.sin(20 * x) + x * x + torch.normal(mean=0, std=0.05, size=(n, 1)) y = y.squeeze() rand_feats = torch.randint_like(x, 2) x = torch.cat([x, rand_feats], dim=1) model = RegressionTree() model.fit(x, y) x_test, _ = torch.rand((n, 1)).sort(dim=0) x_test_rand_feats = torch.randint_like(x_test, 2) x_test = torch.cat([x_test, x_test_rand_feats], dim=1) y_pred = model.predict(x_test) model2 = RandomForestRegressor(n_trees=3) model2.fit(x, y) y_pred2 = model2.predict(x_test) model3 = GradientBoostingRegressor(n_trees=50, learning_rate=0.05, loss="absolute", max_depth=3) history3 = model3.fit(x, y, metrics=["loss"]) y_pred3 = model3.predict(x_test) model4 = gbr(n_estimators=10, learning_rate=0.5, loss="absolute_error") model4.fit(x, y.ravel()) y_pred4 = model4.predict(x_test) loss_adaboost = "exponential" model5 = AdaBoostRegressor(n_trees=50, loss=loss_adaboost, max_depth=3) errors5 = model5.fit(x, y) y_pred5 = model5.predict(x_test) model6 = abr(estimator=DecisionTreeRegressor(max_depth=3), n_estimators=50, loss=loss_adaboost) model6.fit(x, y.ravel()) y_pred6 = model6.predict(x_test) model7 = XGBoostingRegressor(n_trees=50, learning_rate=0.2, loss="huber", max_depth=3, reg_lambda=0.01, gamma=0, huber_delta=5) start = perf_counter() history7 = model7.fit(x, y, metrics=["loss"]) print(f"XGBoost time with {model7.n_trees} weak learners: {perf_counter() - start}") y_pred7 = model7.predict(x_test) model8 = LGBMRegressor(n_trees=150, learning_rate=0.2, loss="squared", max_depth=3, reg_lambda=0.01, gamma=0, huber_delta=5, large_error_proportion=0.3, small_error_proportion=0.2) start = perf_counter() history8 = model8.fit(x, y, metrics=["loss"]) print(f"LGBM time with {model8.n_trees} weak learners: {perf_counter() - start}") y_pred8 = model8.predict(x_test) plt.figure(figsize=(8, 8)) plt.plot(x[:, 0].numpy(), y.numpy(), color="Blue", label="True data") plt.plot(x_test[:, 0].numpy(), y_pred.numpy(), color="Red", label="Regression tree") plt.plot(x_test[:, 0].numpy(), y_pred2.numpy(), color="Green", label="Random forest regressor") plt.plot(x_test[:, 0].numpy(), y_pred3.numpy(), color="Yellow", label="GBR") plt.plot(x_test[:, 0].numpy(), y_pred4, color="gray", label="SKlearn GBR") plt.plot(x_test[:, 0].numpy(), y_pred5.numpy(), color="brown", label="AdaBoostRegressor") plt.plot(x_test[:, 0].numpy(), y_pred6, color="pink", label="SKlearn AdaBoostRegressor") plt.plot(x_test[:, 0].numpy(), y_pred7.numpy(), color="lightblue", label="XGBoostRegressor") plt.plot(x_test[:, 0].numpy(), y_pred8.numpy(), color="black", label="LGBMRegressor") plt.legend(loc="upper left") plt.show() fig, ax = plt.subplots(2, 2, figsize=(8, 8)) plt.subplots_adjust(hspace=0.5, wspace=0.5) ax = ax.ravel() ax[0].plot(history3["loss"]) ax[0].set_ylabel("Loss") ax[0].set_xlabel("Tree") ax[0].set_title("Gradient boosting regressor") ax[1].plot(errors5, label="errors") ax[1].plot(model5.confidences, label="confidences") ax[1].set_ylabel("AdaBoost error") ax[1].set_xlabel("Tree") ax[1].set_title("AdaBoost") ax[1].legend() ax[2].plot(history7["loss"]) ax[2].set_ylabel("Loss") ax[2].set_xlabel("Tree") ax[2].set_title("Extreme gradient boosting regressor") ax[3].plot(history8["loss"]) ax[3].set_ylabel("Loss") ax[3].set_xlabel("Tree") ax[3].set_title("Light gradient boosting machine regressor") plt.show() n = 20 X, Y = torch.meshgrid(torch.linspace(-1, 1, n, dtype=torch.float32), torch.linspace(-1, 1, n, dtype=torch.float32), indexing="xy") x = torch.stack((X.flatten(), Y.flatten()), dim=1) y = X.flatten() ** 2 + Y.flatten() ** 2 + 0.1 * torch.randn(size=Y.flatten().size()) - 5 x_train, y_train, _, _, x_test, y_test = data_split(x, y, train_split=0.8, validation_split=0.0) model.fit(x_train, y_train) z1 = model.predict(x_test) model2.fit(x_train, y_train) z2 = model2.predict(x_test) history3 = model3.fit(x_train, y_train) z3 = model3.predict(x_test) model4.fit(x_train, y_train) z4 = model4.predict(x_test) errors5 = model5.fit(x_train, y_train) z5 = model5.predict(x_test) model6.fit(x_train, y_train) z6 = model6.predict(x_test) history7 = model7.fit(x_train, y_train) z7 = model7.predict(x_test) history8 = model8.fit(x_train, y_train) z8 = model8.predict(x_test) fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(111, projection='3d') ax.scatter(x_test[:, 0], x_test[:, 1], y_test, color="Blue", label="True data") ax.scatter(x_test[:, 0], x_test[:, 1], z1, color="Red", label="Regression tree") ax.scatter(x_test[:, 0], x_test[:, 1], z2, color="Green", label="Random forest regressor") ax.scatter(x_test[:, 0], x_test[:, 1], z3, color="Yellow", label="GBR") ax.scatter(x_test[:, 0], x_test[:, 1], z4, color="gray", label="SKlearn GBR") ax.scatter(x_test[:, 0], x_test[:, 1], z5, color="brown", label="AdaBoostRegressor") ax.scatter(x_test[:, 0], x_test[:, 1], z6, color="pink", label="SKlearn AdaBoostRegressor") ax.scatter(x_test[:, 0], x_test[:, 1], z7, color="lightblue", label="XGBoostRegressor") ax.scatter(x_test[:, 0], x_test[:, 1], z8, color="black", label="LGBMRegressor") plt.legend(loc="upper left") plt.show() fig, ax = plt.subplots(2, 2, figsize=(8, 8)) plt.subplots_adjust(hspace=0.5, wspace=0.5) ax = ax.ravel() ax[0].plot(history3["loss"]) ax[0].set_ylabel("Loss") ax[0].set_xlabel("Tree") ax[0].set_title("Gradient boosting regressor") ax[1].plot(errors5, label="errors") ax[1].plot(model5.confidences, label="confidences") ax[1].set_ylabel("AdaBoost error") ax[1].set_xlabel("Tree") ax[1].set_title("AdaBoost") ax[1].legend() ax[2].plot(history7["loss"]) ax[2].set_ylabel("Loss") ax[2].set_xlabel("Tree") ax[2].set_title("Extreme gradient boosting regressor") ax[3].plot(history8["loss"]) ax[3].set_ylabel("Loss") ax[3].set_xlabel("Tree") ax[3].set_title("Light gradient boosting machine regressor") plt.show() .. rst-class:: sphx-glr-timing **Total running time of the script:** (0 minutes 13.358 seconds) .. _sphx_glr_download_auto_examples_Trees_RegressionTree.py: .. only:: html .. container:: sphx-glr-footer sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: RegressionTree.ipynb ` .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: RegressionTree.py ` .. container:: sphx-glr-download sphx-glr-download-zip :download:`Download zipped: RegressionTree.zip ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_