-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathutils.py
More file actions
79 lines (66 loc) · 3.01 KB
/
utils.py
File metadata and controls
79 lines (66 loc) · 3.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA # 导入PCA模块
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer # 导入数据预处理归一化类
from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.externals import joblib
from sklearn.model_selection import GridSearchCV
# 输入数据矩阵 X 及主成分数目 c
# 主成分数目 c 要小于数据矩阵 X 的长和宽 即 c <= min(x.shape[0],x.shape[1])
# 输出降维后的数据及贡献率
def pca_op(X,c=3):
prepress = Normalizer() #
x = prepress.fit_transform(X) # 拟合转换数据一统一量纲标准化
pca_result = PCA(n_components=c) # 降维后有c个主成分
pca_result.fit(x) # 训练
newX = pca_result.fit_transform(x) # 降维后的数据
# 保存为csv文件
np.savetxt('output/pca_x.csv', newX, delimiter = ',')
return newX, pca_result.explained_variance_ratio_, pca_result
# 使用先有的 pca 模型进行直接降维 输出降维后的矩阵
def re_pca(X, pcaModel):
prepress = Normalizer() #
x = prepress.fit_transform(X) # 拟合转换数据一统一量纲标准化
newX = pcaModel.transform(x)
return newX
# 输入降维后的数据 x 标注 y 交叉验证折数 s
# 交叉验证折数是有限制的 必须保证训练集每个类都能至少分为 s 份 即单个类的数目 sn 应满足 sn / s >= 1
# 输出最佳的SVM模型
def cross_validation(x,y,s=10):
svc = svm.SVC()
parameters = [
{
'C': [1, 3, 5],
'gamma': [0.001, 0.1, 1, 10],
'degree': [3,5,7,9],
'kernel': ['linear','poly', 'rbf', 'sigmoid'],
'decision_function_shape': ['ovo', 'ovr' ,None]
}
]
clf=GridSearchCV(svc,parameters,cv=s,refit=True)
y = y.astype('int')
clf.fit(x, y)
print(clf.best_params_)
print(clf.best_score_)
cross_model = svm.SVC(C=clf.best_params_['C'],degree=clf.best_params_['degree'],kernel=clf.best_params_['kernel'],gamma=clf.best_params_['gamma'], decision_function_shape=clf.best_params_['decision_function_shape'], verbose=0)
scores = cross_val_score(cross_model, x, y.ravel(), cv=s)
return clf.best_estimator_, scores, clf.best_params_
class model:
def __init__(self, OSVM, pcaModel, classNum, threshold, n_components, scross):
self.svmModel = OSVM
self.pcaModel = pcaModel
self.classNum = classNum
self.threshold = threshold
self.n_components = n_components
self.scross = scross
# 保存模型
def modelSave(OSVM, pcaModel, classNum, threshold, n_components, scross):
modelObj = model(OSVM, pcaModel, classNum, threshold, n_components, scross)
joblib.dump(modelObj, 'output/svm_model_with_pca.pkl')
# 读取模型
def modelReader(filePath):
f = open(filePath,'rb')
model = joblib.load(f)
f.close()
return model.svmModel, model.pcaModel, model.classNum, model.threshold, model.n_components, model.scross