Note
Go to the end to download the full example code.
Decision tree and random forest classifiers
This script evaluates the performance of decision tree and random forest classifiers on the Breast Cancer dataset using both DLL (DLL.MachineLearning.SupervisedLearning.Trees) and scikit-learn.
DLL decision tree accuracy: 0.8947368264198303
DLL decision tree execution time: 2.6442803840000124
SKlearn decision tree accuracy: 0.8947368264198303
SKlearn decision tree execution time: 0.0032002929999634944
DLL random forest accuracy: 0.8947368264198303
DLL random forest execution time: 9.536630067999909
SKlearn random forest accuracy: 0.9035087823867798
SKlearn random forest execution time: 0.015310529000089446
import torch
from sklearn import datasets
from sklearn import tree
from sklearn import ensemble
import time
from DLL.Data.Metrics import accuracy
from DLL.Data.Preprocessing import data_split
from DLL.MachineLearning.SupervisedLearning.Trees import DecisionTree, RandomForestClassifier
breast_cancer = datasets.load_breast_cancer()
x = torch.tensor(breast_cancer.data, dtype=torch.float32)
y = torch.tensor(breast_cancer.target, dtype=torch.float32)
x_train, y_train, _, _, x_test, y_test = data_split(x, y, train_split=0.8, validation_split=0.0)
start = time.perf_counter()
model = DecisionTree(max_depth=1, ccp_alpha=0.0, criterion="entropy")
model.fit(x_train, y_train)
predictions = model.predict(x_test)
probas = model.predict_proba(x_test)
print(f"DLL decision tree accuracy: {accuracy(predictions, y_test)}")
print(f"DLL decision tree execution time: {time.perf_counter() - start}")
start = time.perf_counter()
model = tree.DecisionTreeClassifier(max_depth=1, criterion="entropy")
model.fit(x_train.numpy(), y_train.numpy())
predictions = model.predict(x_test)
print(f"SKlearn decision tree accuracy: {accuracy(torch.tensor(predictions), y_test)}")
print(f"SKlearn decision tree execution time: {time.perf_counter() - start}")
start = time.perf_counter()
model = RandomForestClassifier(n_trees=10, max_depth=1, criterion="entropy")
model.fit(x_train, y_train)
predictions = model.predict(x_test)
probas = model.predict_proba(x_test)
print(f"DLL random forest accuracy: {accuracy(predictions, y_test)}")
print(f"DLL random forest execution time: {time.perf_counter() - start}")
start = time.perf_counter()
model = ensemble.RandomForestClassifier(n_estimators=10, max_depth=1, criterion="entropy")
model.fit(x_train.numpy(), y_train.numpy())
predictions = model.predict(x_test)
print(f"SKlearn random forest accuracy: {accuracy(torch.tensor(predictions), y_test)}")
print(f"SKlearn random forest execution time: {time.perf_counter() - start}")
Total running time of the script: (0 minutes 12.258 seconds)