Decision tree and random forest classifiers

This script evaluates the performance of decision tree and random forest classifiers on the Breast Cancer dataset using both DLL (DLL.MachineLearning.SupervisedLearning.Trees) and scikit-learn.

DLL decision tree accuracy: 0.8947368264198303
DLL decision tree execution time: 2.6442803840000124
SKlearn decision tree accuracy: 0.8947368264198303
SKlearn decision tree execution time: 0.0032002929999634944
DLL random forest accuracy: 0.8947368264198303
DLL random forest execution time: 9.536630067999909
SKlearn random forest accuracy: 0.9035087823867798
SKlearn random forest execution time: 0.015310529000089446

import torch
from sklearn import datasets
from sklearn import tree
from sklearn import ensemble
import time

from DLL.Data.Metrics import accuracy
from DLL.Data.Preprocessing import data_split
from DLL.MachineLearning.SupervisedLearning.Trees import DecisionTree, RandomForestClassifier


breast_cancer = datasets.load_breast_cancer()

x = torch.tensor(breast_cancer.data, dtype=torch.float32)
y = torch.tensor(breast_cancer.target, dtype=torch.float32)
x_train, y_train, _, _, x_test, y_test = data_split(x, y, train_split=0.8, validation_split=0.0)

start = time.perf_counter()
model = DecisionTree(max_depth=1, ccp_alpha=0.0, criterion="entropy")
model.fit(x_train, y_train)
predictions = model.predict(x_test)
probas = model.predict_proba(x_test)
print(f"DLL decision tree accuracy: {accuracy(predictions, y_test)}")
print(f"DLL decision tree execution time: {time.perf_counter() - start}")

start = time.perf_counter()
model = tree.DecisionTreeClassifier(max_depth=1, criterion="entropy")
model.fit(x_train.numpy(), y_train.numpy())
predictions = model.predict(x_test)
print(f"SKlearn decision tree accuracy: {accuracy(torch.tensor(predictions), y_test)}")
print(f"SKlearn decision tree execution time: {time.perf_counter() - start}")

start = time.perf_counter()
model = RandomForestClassifier(n_trees=10, max_depth=1, criterion="entropy")
model.fit(x_train, y_train)
predictions = model.predict(x_test)
probas = model.predict_proba(x_test)
print(f"DLL random forest accuracy: {accuracy(predictions, y_test)}")
print(f"DLL random forest execution time: {time.perf_counter() - start}")

start = time.perf_counter()
model = ensemble.RandomForestClassifier(n_estimators=10, max_depth=1, criterion="entropy")
model.fit(x_train.numpy(), y_train.numpy())
predictions = model.predict(x_test)
print(f"SKlearn random forest accuracy: {accuracy(torch.tensor(predictions), y_test)}")
print(f"SKlearn random forest execution time: {time.perf_counter() - start}")

Total running time of the script: (0 minutes 12.258 seconds)

Gallery generated by Sphinx-Gallery