Random Forest: Implementaciones de Scikit-Learn Sobre QSAR
Random Forest: Implementaciones de Scikit-Learn Sobre QSAR
Random Forest
In [2]:
import pandas as pd
import numpy as np
dataset= pd.read_csv("qsar_oral_toxicity.csv", sep=';', prefix='x', header=None)
dataset.head()
Out[2]:
0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0
3 0 0 0 0 0 0 0 1 0 0 ... 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0
In [3]:
negative -> 0
positive -> 1
Out[3]:
0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0
3 0 0 0 0 0 0 0 1 0 0 ... 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0
In [4]:
Out[4]:
negative 6609
positive 584
Name: x1024, dtype: int64
In [5]:
In [6]:
In [7]:
Acierto: 0.9394107837687604
precision recall f1-score support
In [8]:
0.7047099622178948
ID3
In [9]:
In [10]:
Acierto: 0.9049471928849361
precision recall f1-score support
In [11]:
0.731913853697138
Cross Validation
In [12]:
seed = 1
scoring = 'accuracy'
In [13]:
models = []
models.append(('CART', tree.DecisionTreeClassifier()))
models.append(('RF', RandomForestClassifier()))
results = []
names = []
for name, model in models:
kfold = model_selection.KFold(n_splits=10, random_state=None)
cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold,
scoring=scoring)
results.append(cv_results)
names.append(name)
msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
print(msg)
In [18]:
{'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000], 'max
_features': ['auto', 'sqrt'], 'max_depth': [10, 20, 30, 40, 50, 60, 70, 8
0, 90, 100, 110, None], 'min_samples_split': [2, 5, 10], 'min_samples_lea
f': [1, 2, 4], 'bootstrap': [True, False]}
In [15]:
Out[15]:
{'n_estimators': 200,
'min_samples_split': 10,
'min_samples_leaf': 1,
'max_features': 'sqrt',
'max_depth': 60,
'bootstrap': False}
In [16]:
rf_random.best_params_
Out[16]:
{'n_estimators': 200,
'min_samples_split': 10,
'min_samples_leaf': 1,
'max_features': 'sqrt',
'max_depth': 60,
'bootstrap': False}
comparamos
In [22]:
In [23]:
test_pred2=clf2.predict(X_test)
print("\nAcierto:", metrics.accuracy_score(test.output, test_pred2))
print(metrics.classification_report(test.output, test_pred2))
Acierto: 0.9382990550305725
precision recall f1-score support
In [ ]: