在上篇代码的基础上用StepLR更新学习率

发布时间 2023-09-08 20:02:56作者: 奋发图强的小赵

训练集WNN.py代码

参考博文https://blog.csdn.net/weiman1/article/details/125647517?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522169416867516800182757514%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=169416867516800182757514&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~sobaiduend~default-2-125647517-null-null.142^v93^chatgptT3_2&utm_term=%E5%AD%A6%E4%B9%A0%E7%8E%87%E5%A6%82%E4%BD%95%E8%B0%83%E6%95%B4&spm=1018.2226.3001.4187

直接用遇到了字符问题

参考博文https://blog.csdn.net/maly_Sunshine/article/details/123225799?ops_request_misc=&request_id=&biz_id=102&utm_term=lr_scheduler.StepLR&utm_medium=distribute.pc_search_result.none-task-blog-2~all~sobaiduweb~default-5-123225799.nonecase&spm=1018.2226.3001.4187

修改代码

 

#库的导入
import numpy as np
import
pandas as pd import math import torch import torch.nn.functional as F import tensorflow as tf from tensorflow.keras.losses import BinaryCrossentropy losscc = BinaryCrossentropy() #调整学习率(新加) from torch.optim import SGD from torch.optim import lr_scheduler from torch.nn.parameter import Parameter #调整学习率 model = [Parameter(torch.randn(2, 2, requires_grad=True))] optimizer = SGD(model, lr=0.1) #激活函数 def tanh(x): return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x)) #激活函数偏导数 def de_tanh(x): return (1-x**2) #小波基函数 def wavelet(x): return (math.cos(1.75*x)) * (np.exp((x**2)/(-2))) #小波基函数偏导数 def de_wavelet(x): y = (-1) * (1.75 * math.sin(1.75 * x) + x * math.cos(1.75 * x)) * (np.exp(( x **2)/(-2))) return y #数据输入 data=pd.read_pickle('ICC_rms.pkl') df=pd.DataFrame(data) X = df.iloc[:, 0:510].values #所有样本的x值,0-510列 矩阵(1544,510)由此得出样本个数1544个,特征510 y = df.iloc[:, 511].values #所有样本的标签,511列 矩阵(1544,) #把y转成1-0形式,Neurons对应0,Astrocytes对应1 Y=np.array([-1.0] * 1544) for i in range(len(y)): if y[i] =='Neurons': Y[i]=0 if y[i] =='Astrocytes': Y[i]=1 # y=['Neurons' 'Neurons' 'Neurons' ... 'Astrocytes' 'Astrocytes' 'Astrocytes'] # Y=[0. 0. 0. ... 1. 1. 1.] # #参数设置 # samnum = 72 #输入数据数量 # hiddenunitnum = 8 #隐含层节点数 # indim = 4 #输入层节点数 # outdim = 1 #输出层节点数 # maxepochs = 500 #迭代次数 # errorfinal = 0.65*10**(-3) #停止迭代训练条件 # learnrate = 0.001 #学习率 #参数设置 samnum = 1544 #输入数据数量 hiddenunitnum = 8 #隐含层节点数 indim = 510 #输入层节点数 outdim = 1 #输出层节点数 maxepochs = 300 #迭代次数 errorfinal = 0.65*10**(-3) #停止迭代训练条件 #learnrate = 0.0001 #学习率
#(新加)
scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1) #输入数据的导入 # df = pd.read_csv("train.csv") # df.columns = ["Co", "Cr", "Mg", "Pb", "Ti"] # Co = df["Co"] # Co = np.array(Co) # Cr = df["Cr"] # Cr = np.array(Cr) # Mg=df["Mg"] # Mg=np.array(Mg) # Pb = df["Pb"] # Pb =np.array(Pb) # Ti = df["Ti"] # Tisample = np.array(Ti) # sampleinshaple = np.mat([Co,Cr,Mg,Pb]) Ti = np.array(Y) samplein = np.mat(X.T) #数据归一化,将输入数据压缩至0到1之间,便于计算,后续通过反归一化恢复原始值 sampleinminmax = np.array([samplein.min(axis=1).T.tolist()[0],samplein.max(axis=1).T.tolist()[0]]).transpose()#对应最大值最小值 #待预测数据为Ti sampleout = np.mat([Ti]) sampleoutminmax = np.array([sampleout.min(axis=1).T.tolist()[0],sampleout.max(axis=1).T.tolist()[0]]).transpose()#对应最大值最小值 sampleinnorm = ((np.array(samplein.T)-sampleinminmax.transpose()[0])/(sampleinminmax.transpose()[1]-sampleinminmax.transpose()[0])).transpose() sampleoutnorm = ((np.array(sampleout.T)-sampleoutminmax.transpose()[0])/(sampleoutminmax.transpose()[1]-sampleoutminmax.transpose()[0])).transpose() #给归一化后的数据添加噪声 noise = 0.03*np.random.rand(sampleoutnorm.shape[0],sampleoutnorm.shape[1]) sampleoutnorm += noise # scale = np.sqrt(3/((indim+outdim)*0.5)) w1 = np.random.uniform(low=-scale,high=scale,size=[hiddenunitnum,indim]) b = np.random.uniform(low=-scale, high=scale, size=[hiddenunitnum,1]) a = np.random.uniform(low=-scale, high=scale, size=[hiddenunitnum,1]) w2 = np.random.uniform(low=-scale,high=scale,size=[hiddenunitnum,outdim]) #对隐含层的连接权值w1、平滑因子被b和伸缩因子a、输出层的连接权值w2进行随机初始化 inputin=np.mat(sampleinnorm.T) w1=np.mat(w1) b=np.mat(b) a=np.mat(a) w2=np.mat(w2) #errhistory存储每次迭代训练计算的误差 errhistory = np.mat(np.zeros((1,maxepochs))) #开始训练 for i in range(maxepochs): #更新学习率(新加) optimizer.zero_grad() optimizer.step() learnrate=optimizer.param_groups[0]['lr'] scheduler.step() #前向计算: #hidden_out为隐含层输出 hidden_out = np.mat(np.zeros((samnum,hiddenunitnum))) for m in range(samnum): for j in range(hiddenunitnum): d=((inputin[m, :] * w1[j, :].T) - b[j,:]) * (a[j,:] ** (-1)) hidden_out[m,j] = wavelet(d) #output为输出层输出 output = tanh(hidden_out * w2) #计算误差 out_real = np.mat(sampleoutnorm.transpose()) err = out_real - output loss = losscc(out_real,output) #判断是否停止训练 if loss < errorfinal: break errhistory[:,i] = loss #反向计算 out_put=np.array(output.T) belta=de_tanh(out_put).transpose() #分别计算每个参数的误差项 for j in range(hiddenunitnum): sum1 = 0.0 sum2 = 0.0 sum3 = 0.0 sum4 = 0.0 sum5 = 0.0 for m in range(samnum): sum1+= err[m,:] * belta[m,:] * w2[j,:] * de_wavelet(hidden_out[m,j]) * (inputin[m,:] / a[j,:]) #1*1 sum2+= err[m,:] * belta[m,:] * w2[j,:] * de_wavelet(hidden_out[m,j]) * (-1) * (1 / a[j,:]) #1*1 sum3+= err[m,:] * belta[m,:] * w2[j,:] * de_wavelet(hidden_out[m,j]) * (-1) * ((inputin[m,:] * w1[j,:].T - b[j,:]) / (a[j,:] * a[j,:])) #1*1 sum4+= err[m,:] * belta[m,:] * hidden_out[m,j] delta_w1 = sum1 delta_b = sum2 delta_a = sum3 delta_w2 = sum4 #根据误差项对四个参数进行更新 w1[j,:] = w1[j,:] + learnrate * delta_w1 b[j,:] = b[j,:] + learnrate * delta_b a[j,:] = a[j,:] + learnrate * delta_a w2[j,:] = w2[j,:] + learnrate * delta_w2 print("the generation is:",i+1,",the loss is:",loss) print('更新的w1:',w1) print('更新的b:',b) print('更新的w2:',w2) print('更新的a:',a) print("The loss after iteration is :",loss) np.save("w1.npy",w1) np.save("b.npy",b) np.save("w2.npy",w2) np.save("a.npy",a)

遇到了loss值不下降的问题,于是手动停止运行

修正代码,因为迭代次数设置的300,所以把

  scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1)

改为

 scheduler = lr_scheduler.StepLR(optimizer, 30, gamma=0.1, last_epoch=-1)

但是在后面迭代的过程中还是遇到了loss一直不变的问题

修改后的运行结果是起初loss一直下降

然后学习率变低之后,loss就会连续好几次迭代不下降,最后继续下降

300次迭代完成后loss变为3.9多

The loss after iteration is : tf.Tensor(3.9835846424102783, shape=(), dtype=float64)

然后用test.py测试

#库的导入
import numpy as np
import pandas as pd
import math
from tensorflow.keras.losses import BinaryCrossentropy
losscc = BinaryCrossentropy()

#小波基函数
def wavelet(x):
    return (math.cos(1.75*x)) * (np.exp((x**2)/(-2)))
#激活函数tanh
def tanh(x):
    return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))

#数据输入
data=pd.read_pickle('ICC_rms.pkl')
df=pd.DataFrame(data)
X = df.iloc[:, 0:510].values #所有样本的x值,0-510列 矩阵(1544,510)由此得出样本个数1544个,特征510
y = df.iloc[:, 511].values #所有样本的标签,511列 矩阵(1544,)
#把y转成1-0形式,Neurons对应0,Astrocytes对应1
Y=np.array([-1.0] * 1544)
for i in range(len(y)):
    if y[i] =='Neurons':
        Y[i]=0
    if y[i] =='Astrocytes':
        Y[i]=1

#输入数据的导入,用于测试数据的归一化与返归一化
# df = pd.read_csv("train.csv")
# df.columns = ["Co", "Cr", "Mg", "Pb", "Ti"]
# Co = df["Co"]
# Co = np.array(Co)
# Cr = df["Cr"]
# Cr = np.array(Cr)
# Mg=df["Mg"]
# Mg=np.array(Mg)
# Pb = df["Pb"]
# Pb =np.array(Pb)
# Ti = df["Ti"]
# Tisample = np.array(Ti)
# sampleinsample = np.mat([Co,Cr,Mg,Pb])
Ti = np.array(Y)
samplein = np.mat(X.T)
sampleinminmax = np.array([samplein.min(axis=1).T.tolist()[0],samplein.max(axis=1).T.tolist()[0]]).transpose()#对应最大值最小值
sampleout = np.mat([Ti])
sampleoutminmax = np.array([sampleout.min(axis=1).T.tolist()[0],sampleout.max(axis=1).T.tolist()[0]]).transpose()#对应最大值最小值

#导入WNN.py训练好的参数
w1=np.load('w1.npy')
b=np.load('b.npy')
a=np.load('a.npy')
w2=np.load('w2.npy')
w1 = np.mat(w1)
w2 = np.mat(w2)
b = np.mat(b)
a = np.mat(a)

#隐含层节点数
hiddenunitnum = 8
#测试数据数量
testnum = 24


#测试数据的导入
df = pd.read_csv("test.csv")
df.columns = ["Co", "Cr", "Mg", "Pb", "Ti"]
Co = df["Co"]
Co = np.array(Co)
Cr = df["Cr"]
Cr = np.array(Cr)
Mg=df["Mg"]
Mg=np.array(Mg)
Pb = df["Pb"]
Pb =np.array(Pb)
Ti = df["Ti"]
# Ti = np.array(Ti)
# input=np.mat([Co,Cr,Mg,Pb])
Ti = np.array(Y)
input = np.mat(X.T)

#测试数据中输入数据的归一化
inputnorm=(np.array(input.T)-sampleinminmax.transpose()[0])/(sampleinminmax.transpose()[1]-sampleinminmax.transpose()[0])
#hidden_out2用于保存隐含层输出
hidden_out = np.mat(np.zeros((testnum,hiddenunitnum)))
#计算隐含层输出
for m in range(testnum):
    for j in range(hiddenunitnum):
        d = ((inputnorm[m, :] * w1[j, :].T) - b[j, :]) * (a[j, :] ** (-1))
        hidden_out[m, j] = wavelet(d)
#计算输出层输出
output = tanh(hidden_out * w2 )
#对输出结果进行反归一化
diff = sampleoutminmax[:,1]-sampleoutminmax[:,0]
networkout2 = output*diff+sampleoutminmax[0][0]
networkout2 = np.array(networkout2).transpose()
output1=networkout2.flatten()#降成一维数组
output1=output1.tolist()
for i in range(testnum):
    output1[i] = float('%.2f'%output1[i])
print("the prediction is:",output1)

# #将输出结果与真实值进行对比,计算误差
# output=Ti
# rmse = (np.sum(np.square(output-output1))/len(output)) ** 0.5
# mae = np.sum(np.abs(output-output1))/len(output)
# #average_loss1=np.sum(np.abs((output-output1)/output))/len(output)
# average_loss1=losscc(output,output1)
# mape="%.2f%%"%(average_loss1*100)
# f1 = 0
# for m in range(testnum):
#     f1 = f1 + np.abs(output[m]-output1[m])/((np.abs(output[m])+np.abs(output1[m]))/2)
# f2 = f1 / testnum
# smape="%.2f%%"%(f2*100)
# print("the MAE is :",mae)
# print("the RMSE is :",rmse)
# print("the MAPE is :",mape)
# print("the SMAPE is :",smape)

#计算预测值与真实值误差与真实值之比的分布
A=0
B=0
C=0
D=0
E=0
for m in range(testnum):
    y1 = np.abs(output[m]-output1[m])/np.abs(output[m])
    if y1 <= 0.1:
        A = A + 1
    elif y1 > 0.1 and y1 <= 0.2:
        B = B + 1
    elif y1 > 0.2 and y1 <= 0.3:
        C = C + 1
    elif y1 > 0.3 and y1 <= 0.4:
        D = D + 1
    else:
        E = E + 1
print("Ratio <= 0.1 :",A)
print("0.1< Ratio <= 0.2 :",B)
print("0.2< Ratio <= 0.3 :",C)
print("0.3< Ratio <= 0.4 :",D)
print("Ratio > 0.4 :",E)

结果显然不对,预测值全为1

the prediction is: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
Ratio <= 0.1 : 24
0.1< Ratio <= 0.2 : 0
0.2< Ratio <= 0.3 : 0
0.3< Ratio <= 0.4 : 0
Ratio > 0.4 : 0

但是不知道为什么前面预测不正确的时候预测值全为0,这次预测不正确了预测值全为1?

还是学习率的问题,现在的问题是找不到一个合适的学习率