Para mismo ejemplo con numpy.matrix y detalle de regresión logística, ver nota publicada AQUI.
script:
import numpy as np from sklearn.linear_model import LogisticRegression from sklearn import metrics import pandas as pd # DATA #--------------------------------------------------------------------------------------------- data = pd.read_csv('https://www.dropbox.com/s/cugxdc9mhau4nw1/titanic2.csv?dl=1') clase_name = 'survived' # nombre de variable a predecir headers = data.columns.values.tolist() headers.remove(clase_name) # TRAIN TEST #--------------------------------------------------------------------------------------------- np.random.seed(123) m_train = np.random.rand(len(data)) < 0.5 data_train = data.iloc[m_train,] data_test = data.iloc[~m_train,] # MODELO modelo = LogisticRegression(random_state=1) modelo.fit(data_train[headers], data_train[clase_name]) # PREDICCION #--------------------------------------------------------------------------------------------- prediccion = modelo.predict(data_test[headers]) # METRICAS #--------------------------------------------------------------------------------------------- print(metrics.classification_report(y_true=data_test[clase_name], y_pred=prediccion)) print(pd.crosstab(data_test[clase_name], prediccion, rownames=['REAL'], colnames=['PREDICCION']))
No hay comentarios:
Publicar un comentario