2022年第二届长三角大学数学建模竞赛B题经验
1、题目要求 其中数据 附件一数据(截图): 附件二数据(部分截图):
插入代码片
传感器数据,提取相关特征。 研究发现 VMD 该方法可以避免模态混合问题。VMD 低频分量更容易表达振动信号故障波动的总体趋势。首先,对于每一种状态,对于每一种状态,对于每一种状态,对于每一种状态,对于每一种状态,对于每一种状态,对于每一种状态,对于每一种状态,对于每一种状态,对于每一种状态,对于每一种状态,对于每一种状态,对于每一种状态,对于每一种状态,对于每一种状态, 1000 条记录构造样本数据。通过绘制 样本时域图在5个状态下,观察波形的整体情况。二是样本数据。 VMD 四种变分模式提取 IMF 分量。从振动信号的时域特征、频域特征和能量特征三个方面出发,对每个IMF 构建平均值、峰值、中心频率等 24 构建相应的特征向量,构建相应的数学公式和计算过程。最后,根据优化组合获得频域 能量构建的特征向量能有效区分正常状态和异常状态。差异结果见表 2、3。 第二,要求检查齿轮箱是否有故障。首先,问题一 IMF 在模态分量的基础上使用 SVD 对每一个 IMF 变量,提取时域、频域和能量域的特征,优化组合,选择频域 能量域构建特征向量。第二,通过构建基础 VMD 齿轮箱故障检测模型判断各状态的振动信号。构建样本数据和二分类标签 8:2 分为训练集和测试集 SVM 故障检测作为分类器,以准确性为评价指标。最后得出结论,该模型对振动信号故障障的准确性 100%。 对于问题3,需要判断齿轮箱的具体故障类型。具体故障类型正常,故障类型正常 1.故障2.故障 3、故障 4 共 5 类型。构建样本数据和五分类标签 8:2 分为训练集和测试集,SVM 故障判断作为分类器,以准确性为评价指标。最后得出结论,该模型判断振动信号故障的准确性为 96.7%。 针对问题4,结合问题2和问题3的模型检测和判断测试数据。 通过设置其他类阈值来纠正其他故障的可能性。当检测和 判断模型对所预测类型的最大概率不大于其他类阈值时,判断为其他故障。最终的测试 数据诊断结果表如表 4 所示。
问题一代码 `import numpy as np import matplotlib.pyplot as plt import xlrd as xd from vmdpy import VMD from scipy.fftpack import fft def xls2npy(): ''转换数据格式 :return: '''for i in [0,1]: if i == 0: curTablePath = "train" else: curTablePath = "test" # 加载数据 tables = xd.open _workbook("../data/xls/" curTablePath ".xls") sheetNames = tables.sheet _names() dict = {
} for i in range(len(sheetNames)): curSheetName = sheetNames[i] curSheet = tables.sheet _by_name(curSheetName) ncols = curSheet.ncols # 按列加载每一个 sheet 的每列数据 curSheetData = [] # 去掉第一列,即 No 列 for j in range(1, ncols): curCol = curSheet.col _values(/span>j) # 去除第一行 curCol = curCol[1:-1] curSheetData.append(curCol) # 对每列进行最大最小归一化,以去除数值过大的影响 curSheetData = np.vstack([item for item in curSheetData]) # 转为(29339,4) curSheetData = curSheetData.transpose() dict[curSheetName] = curSheetData np.save("../data/npy/"+curTablePath+"/dict.npy",dict) np.save("../data/npy/"+curTablePath+"/sheetNames.npy",sheetNames) print("_") def showSensorFeatures(): '''对训练集数据进行 VMD、SVD 后特征提取 :return: ''' basePath = "../data/npy/" dict = np.load(basePath + "train/dict.npy", allow _pickle=True).item() # 所有状态的传感器 1 gearBoxs = ['gearbox00', 'gearbox10', 'gearbox20', 'gearbox30', 'gearbox40'] for i in range(len(gearBoxs)): curGearBoxName = gearBoxs[i] # 正常状态传感器 1 # 每一千条计算一个样本特征 # 记录当前状态下的四个传感器的特征向量 samplesFeaturesChain = [] for j in range(4): curGearBoxData = dict[curGearBoxName][:, j] size = curGearBoxData.shape[0] curNum = 0 samplesFeatures = [] curLeft = size data1 = curGearBoxData[curNum:curNum + 1000] curNum += 1000 curLeft -= 1000 u, u _hat, omega = vmd(data1) # 对每个分量进行 svd 去噪 for i in range(len(u)): curIMF = u[i] # 对每个分量进行去噪 curIMF = svd(curIMF) u[i] = curIMF # 得到每个模态特征向量 curUIMFFeas = calIndicator(u) # 将当前传感器前 1000 条数据作为一个记录,画出中心模态 dataPlot(data1,4,u) def vmd(data1): '''计算当前传感器数据 data1 的四个本征模态分量 :param data1: 传感器数据 :return: 模态分量 u ''' alpha = 7000 # moderate bandwidth constraint tau = 0. # noise-tolerance (no strict fidelity enforcement) K = 4 # 3 modes DC = 0 # no DC part imposed init = 1 # initialize omegas uniformly tol = 1e-7 16 # u 是分量 u, u _hat, omega = VMD(data1, alpha, tau, K, DC, init, tol) return u, u _hat, omega def svd(u _IMFi): ''' 对某个状态的某个 IMF 分量 u 进行奇异值去噪 :param u _IMFi: :return: ''' # series = np.load('../data/npy/train/box00/u.npy')[0,:1000] series = u _IMFi # step1 嵌入 windowLen = 20 # 嵌入窗口长度 seriesLen = len(series) # 序列长度 K = seriesLen - windowLen + 1 X = np.zeros((windowLen, K)) for i in range(K): X[:, i] = series[i:i + windowLen] # step2: svd 分解, U 和 sigma 已经按升序排序 U, sigma, VT = np.linalg.svd(X, full _matrices=False) for i in range(VT.shape[0]): VT[i, :] *= sigma[i] A = VT # 重组 rec = np.zeros((windowLen, seriesLen)) for i in range(windowLen): for j in range(windowLen - 1): for m in range(j + 1): rec[i, j] += A[i, j - m] * U[m, i] rec[i, j] /= (j + 1) for j in range(windowLen - 1, seriesLen - windowLen + 1): for m in range(windowLen): rec[i, j] += A[i, j - m] * U[m, i] rec[i, j] /= windowLen for j in range(seriesLen - windowLen + 1, seriesLen): for m in range(j - seriesLen + windowLen, windowLen): rec[i, j] += A[i, j - m] * U[m, i] rec[i, j] /= (seriesLen - j) rrr = np.sum(rec[:2,:], axis=0) # 选择重构的部分,这里选了前三个 return rrr if __name __ == '__main __': # 将 xls 文件转化为 npy 文件 xls2npy() # 展示特征向量 showSensorFeatures()`
问题二代码:
import numpy as np
from sklearn.svm import SVC
from sklearn.model
_selection import train
_test
_split
import scipy.stats
def getSensorFeature():
'''构造传感器的特征向量
:return:
''' basePath = "../data/npy/" # 训练数据
dict = np.load(basePath + "train/dict.npy", allow
_pickle=True).item()
# 测试数据
# dict = np.load(basePath + "test/dict.npy", allow
_pickle=True).item()
# 所有状态的传感器 1
gearBoxs = ['gearbox00','gearbox10','gearbox20','gearbox30','gearbox40']
testBoxs = ['test1','test2','test3','test4',
'test5','test6','test7','test8',
'test9','test10','test11','test12']
samplesFeaturesDict = {
}
for i in range(len(gearBoxs)):
curGearBoxName = gearBoxs[i]
# 正常状态传感器 1
# 每一千条计算一个样本特征
curGearBoxData = dict[curGearBoxName]
# 记录当前状态下的四个传感器的特征向量
samplesFeaturesChain = []
for j in range(4):
curGearBoxData = dict[curGearBoxName][:,j]
size = curGearBoxData.shape[0]
curNum = 0
samplesFeatures = []
curLeft = size
while(curLeft>0):
# 得到当前 1000 条数据
steps = 1000
if curLeft<=1000:
steps = curLeft
data1 = curGearBoxData[curNum:curNum+steps]
curNum+=1000
curLeft-=1000
u, u
_hat, omega = vmd(data1)
# 对每个分量进行 svd 去噪
# 若需要原始信号的频谱图,则注释下面的 for 循环
for i in range(len(u)):
curIMF = u[i]
# 对每个分量进行去噪
curIMF = svd(curIMF)
u[i]=curIMF
# 得到每个模态特征向量
curUIMFFeas = calIndicator(u)
samplesFeatures.append(curUIMFFeas)
# (30,52)
samplesFeatures =
np.reshape(samplesFeatures,(len(samplesFeatures),samplesFeatures[0].shape[0]))
samplesFeaturesChain.append(samplesFeatures)
print(".")
# 需要得到 30,52*4 的数据
samplesFeaturesChain = np.hstack([item for item in samplesFeaturesChain])
samplesFeaturesDict[curGearBoxName] = samplesFeaturesChain
# 当需要训练集数据时,消注释这条
np.save('../data/npy/train/sampelsFeaturesDict.npy', samplesFeaturesDict)
# 当需要测试集数据时,消注释这条
# np.save('../data/npy/test/sampelsFeaturesDict.npy', samplesFeaturesDict)
# 特征提取类
class Fea
_Extra():
def
__init
__(self, Signal, Fs = 25600):
self.signal = Signal
self.Fs = Fs
def Time
_fea(self):
""" 提取时域特征 11 类
"""
signal
_ = self.signal
N = len(signal
_)
y = signal
_
# 1
_均值(平均幅值)
t
_mean
_1 = np.mean(y)
# 3
_方根幅值
t
_fgf
_3 = ((np.mean(np.sqrt(np.abs(y)))))**2
# 4
_RMS 均方根
t
_rms
_4 = np.sqrt((np.mean(y**2)))
# 5
_峰峰值
t
_pp_5 = 0.5*(np.max(y)-np.min(y))
# 6
_偏度 skewness
t
_skew
_6 = scipy.stats.skew(y)
# 7
_峭度 Kurtosis
t
_kur
_7 = scipy.stats.kurtosis(y)
# 8
_峰值因子 Crest Factor
t
_cres
_8 = np.max(np.abs(y))/t
_rms
_4
# 9
_裕度因子 Clearance Factor
t
_clear
_9 = np.max(np.abs(y))/t
_fgf
_3
# 10
_波形因子 Shape fator
t
_shape
_10 = (N * t
_rms
_4)/(np.sum(np.abs(y)))
# 11
_脉冲指数 Impulse Fator
t
_imp_11 = ( np.max(np.abs(y)))/(np.mean(np.abs(y)))
t
_max
_12 = np.max(y)
t
_fea = np.array([t
_mean
_1, t
_rms
_4, t
_pp_5, t
_skew
_6, t
_kur
_7, t
_cres
_8, t
_clear
_9, t
_shape
_10, t
_imp_11,t
_max
_12])
return t
_fea
def Fre
_fea(self):
""" 提取频域特征 13 类
:param signal
_:
:return:
"""
signal
_ = self.signal
L = len(signal
_)
PL = abs(np.fft.fft(signal
_ / L))[: int(L / 2)]
PL[0] = 0
f = np.fft.fftfreq(L, 1 / self.Fs)[: int(L / 2)]
x = f
y = PL
K = len(y)
f
_12 = np.mean(y)
f
_13 = np.var(y)
f
_14 = (np.sum((y - f
_12)**3))/(K * ((np.sqrt(f
_13))**3))
f
_15 = (np.sum((y - f
_12)**4))/(K * ((f
_13)**2))
f
_16 = (np.sum(x * y))/(np.sum(y))
f
_17 = np.sqrt((np.mean(((x- f
_16)**2)*(y))))
f
_18 = np.sqrt((np.sum((x**2)*y))/(np.sum(y)))
f
_19 = np.sqrt((np.sum((x**4)*y))/(np.sum((x**2)*y)))
f
_20 = (np.sum((x**2)*y))/(np.sqrt((np.sum(y))*(np.sum((x**4)*y))))
f
_21 = f
_17/f
_16
f
_22 = (np.sum(((x - f
_16)**3)*y))/(K * (f
_17**3))
f
_23 = (np.sum(((x - f
_16)**4)*y))/(K * (f
_17**4))
f
_fea = np.array([f
_12, f
_13, f
_14, f
_15, f
_16, f
_17, f
_18, f
_19, f
_20, f
_21, f
_22, f
_23])
return f
_fea
def calIndicator(u):
''' :param u: 某个状态下模态分量的集合
:return: 某个状态对应模态分量的特征变量
''' Elist = []
timeFeas = []
freqFeas = []
# 对每一个分量机 IMFi 计算时域、频域、能量域的特征值
for i in range(u.shape[0]):
curU = u[i, :]
# 计算能量值
E = 0
for i in curU:
E += (i * i)
Elist.append(E)
# 计算时域值
time
_fea = Fea
_Extra(curU).Time
_fea()
# 计算频域值
freq_fea = Fea
_Extra(curU).Fre
_fea()
timeFeas.append(time
_fea)
freqFeas.append(freq_fea)
totalE = np.sum(Elist)
Elist = Elist / totalE
# 时域特征、频域特征、能量值
IMFFeas = []
for i in range(u.shape[0]):
curIMFFeatures = []
curTimeFea = timeFeas[i]
curFreqFea = freqFeas[i]
curEnergyMark = Elist[i]
# curIMFFeatures.append(curTimeFea)
# 目前实现频域+能量的特征
curIMFFeatures.append(curFreqFea)
curIMFFeatures.append(curEnergyMark)
curIMFFeatures