-
Notifications
You must be signed in to change notification settings - Fork 52
Expand file tree
/
Copy pathboosting_classifier.py
More file actions
63 lines (46 loc) · 2.11 KB
/
boosting_classifier.py
File metadata and controls
63 lines (46 loc) · 2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")
y_train = train_data["Survived"]
train_data.drop(labels="Survived", axis=1, inplace=True)
full_data = train_data.append(test_data)
drop_columns = ["Name", "Age", "SibSp", "Ticket", "Cabin", "Parch", "Embarked"]
full_data.drop(labels=drop_columns, axis=1, inplace=True)
full_data = pd.get_dummies(full_data, columns=["Sex"])
full_data.fillna(value=0.0, inplace=True)
X_train = full_data.values[0:891]
X_test = full_data.values[891:]
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
state = 12
test_size = 0.25
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train,
test_size=test_size, random_state=state)
lr_list = [0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1]
for learning_rate in lr_list:
gb_clf = GradientBoostingClassifier(n_estimators=20, learning_rate = learning_rate, max_features=2, max_depth = 2, random_state = 0)
gb_clf.fit(X_train, y_train)
print("Learning rate: ", learning_rate)
print("Accuracy score (training): {0:.3f}".format(gb_clf.score(X_train, y_train)))
print("Accuracy score (validation): {0:.3f}".format(gb_clf.score(X_val, y_val)))
gb_clf2 = GradientBoostingClassifier(n_estimators=20, learning_rate = 0.5, max_features=2, max_depth = 2, random_state = 0)
gb_clf2.fit(X_train, y_train)
predictions = gb_clf2.predict(X_val)
print("Confusion Matrix:")
print(confusion_matrix(y_val, predictions))
print("Classification Report")
print(classification_report(y_val, predictions))
xgb_clf = XGBClassifier()
xgb_clf.fit(X_train, y_train)
score = xgb_clf.score(X_val, y_val)
print(score)
preds2 = xgb_clf.predict(X_val)
accuracy = accuracy_score(y_val, preds2)
print("Accuracy: " + str((accuracy * 100.0)))