PyQt5-RamanSpectraClassification/utils.py at master · Cheereus/PyQt5-RamanSpectraClassification · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA    # 导入PCA模块
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer    # 导入数据预处理归一化类
from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.externals import joblib
from sklearn.model_selection import GridSearchCV

# 输入数据矩阵 X 及主成分数目 c
# 主成分数目 c 要小于数据矩阵 X 的长和宽 即 c <= min(x.shape[0],x.shape[1])
# 输出降维后的数据及贡献率
def pca_op(X,c=3):

    prepress = Normalizer()  #
    x = prepress.fit_transform(X)  # 拟合转换数据一统一量纲标准化
    pca_result = PCA(n_components=c)      # 降维后有c个主成分
    pca_result.fit(x)                     # 训练
    newX = pca_result.fit_transform(x)      # 降维后的数据

    # 保存为csv文件
    np.savetxt('output/pca_x.csv', newX, delimiter = ',')

    return newX, pca_result.explained_variance_ratio_, pca_result

# 使用先有的 pca 模型进行直接降维 输出降维后的矩阵
def re_pca(X, pcaModel):
    prepress = Normalizer()  #
    x = prepress.fit_transform(X)  # 拟合转换数据一统一量纲标准化
    newX = pcaModel.transform(x)
    return newX

# 输入降维后的数据 x 标注 y 交叉验证折数 s
# 交叉验证折数是有限制的 必须保证训练集每个类都能至少分为 s 份 即单个类的数目 sn 应满足 sn / s >= 1
# 输出最佳的SVM模型
def cross_validation(x,y,s=10):

    svc = svm.SVC()
    parameters = [
        {
            'C': [1, 3, 5],
            'gamma': [0.001, 0.1, 1, 10],
            'degree': [3,5,7,9],
            'kernel': ['linear','poly', 'rbf', 'sigmoid'],
            'decision_function_shape': ['ovo', 'ovr' ,None]
        }
    ]
    clf=GridSearchCV(svc,parameters,cv=s,refit=True)
    y = y.astype('int')
    clf.fit(x, y)
    print(clf.best_params_)
    print(clf.best_score_)

    cross_model = svm.SVC(C=clf.best_params_['C'],degree=clf.best_params_['degree'],kernel=clf.best_params_['kernel'],gamma=clf.best_params_['gamma'], decision_function_shape=clf.best_params_['decision_function_shape'], verbose=0)
    scores = cross_val_score(cross_model, x, y.ravel(), cv=s)

    return clf.best_estimator_, scores, clf.best_params_

class model:
    def __init__(self, OSVM, pcaModel, classNum, threshold, n_components, scross):
        self.svmModel = OSVM
        self.pcaModel = pcaModel
        self.classNum = classNum
        self.threshold = threshold
        self.n_components = n_components
        self.scross = scross

# 保存模型
def modelSave(OSVM, pcaModel, classNum, threshold, n_components, scross):
    modelObj = model(OSVM, pcaModel, classNum, threshold, n_components, scross)
    joblib.dump(modelObj, 'output/svm_model_with_pca.pkl')

# 读取模型
def modelReader(filePath):

    f = open(filePath,'rb')
    model = joblib.load(f)
    f.close()
    return model.svmModel, model.pcaModel, model.classNum, model.threshold, model.n_components, model.scross