注释:Ng的视频有完整的推到步骤,不过理论和实践还是有很大差别的,代码实现还得完成
1.Logistic回归理论
,Ng的推导很完美,看懂就可以了,没必要自己推导一遍,因为几天不用就忘记 了。
2.代码实现
2.1全局梯度上升
每次训练针对整体,依据整体去找最值。
好处:容易过滤局部极值,找到真正的全局极值。
坏处:整体数据太多,花费时间太久,而且新来的样本必须重新训练。
推倒公式:见博文刚开始的链接,Ng大神的全部推导及证明!
1 def loadDataSet(): 2 dataMat = [] 3 labelMat = [] 4 fr = open('testSet.txt') 5 for line in fr.readlines(): 6 lineArr = line.strip().split()#分割空格 7 #改变存储data:[[a,b],[c,d]]/ 8 # labels:[1,0,0,1...] 9 dataMat.append([1.0,float(lineArr[0]),float(lineArr[1])])10 labelMat.append([int(lineArr[2])])11 return dataMat, labelMat12 def sigmoid(intX):13 return 1.0/(1.0+np.exp(-intX))14 #全局梯度上升法15 def gradAscent(dataMatIn,classLabels):16 dataMatrix = np.mat(dataMatIn)17 labelsMat = np.mat(classLabels)18 m, n = dataMatrix.shape19 alpha = 0.00120 maxCycle = 20021 weight = np.ones((n,1))#这里为了简单写,把b也当作一个w了22 for k in range(maxCycle):23 h = sigmoid(dataMatrix*weight)24 error = labelsMat - np.mat(h)25 weight = weight + alpha*dataMatrix.transpose()*error26 return weight
2.1简单分类可视化
利用matplotlib画出简单分类的决策边界
注意:这里plot转化为list之后绘制的,看网上说可以直接用matrix,但是我运行出错。
1 def plotBestFit(weight): 2 dataMat, labelMat = loadDataSet() 3 dataArr = np.array(dataMat)#转化为数组 4 n = dataArr.shape[0] 5 xcode1=[];ycode1=[] 6 xcode2=[];ycode2=[] 7 for i in range(n): 8 if int(labelMat[i][0])==1: 9 xcode1.append(dataArr[i,1])10 ycode1.append(dataArr[i,2])11 else:12 xcode2.append(dataArr[i,1])13 ycode2.append(dataArr[i,2])14 fig = plt.figure("data_x_y")15 ax = fig.add_subplot(111)16 ax.scatter(xcode1,ycode1,s=30,c='r',marker='s')17 ax.scatter(xcode2,ycode2,s=30,c='g')18 x = np.mat(np.arange(-3.0,3.0,0.1))19 y = (-weight[0]-weight[1]*x)/weight[2]20 ax.plot(x.tolist()[0],y.tolist()[0])21 plt.xlabel('X1')22 plt.ylabel('X2')23 plt.show()
2.3局部随机梯度上升法及改进
局部随机梯度:和全局相对,利用单个样本更新W,同时又是利用正太分布的规律去随机选择样本的次序。
好处:‘局部’训练效率高,而且新的样本可以直接添加不用重新训练,‘随机’解决了样本规律性的波动,树上有图解。
坏处:可能得到局部极值。
1 #局部梯度上升法-老版本 2 def stoGradAscent0(dataMatrix,classLabels): 3 m,n = dataMatrix.shape 4 alpha = 0.01 5 weights = np.ones(n)#最好别写0,因为0的拟合速度很慢 6 for i in range(m): 7 h = sigmoid(sum(dataMatrix[i]*weights)) 8 error = classLabels - h 9 weights = weights +alpha* error* dataMatrix[i]10 return weights11 #随机梯度上升法-新版本12 def stoGradAscent1(dataMatraix,classLabels,numIter=150):13 #alpha不断改变14 #选取的样本随机改变15 m,n = dataMatraix.shape16 weights = np.ones(n)17 for j in range(numIter):18 dataIndex = list(range(m))#样本19 for i in range(m):20 alpha = 4/(1.0+j+i) +0.01#随着迭代次数和样本的训练次数的增加而减小21 randIndex = int(np.random.uniform(0,len(dataIndex)))#随机样本下标22 h = sigmoid(sum(dataMatraix[randIndex]*weights))23 error = classLabels[randIndex] - h24 weights = weights +alpha*error*dataMatraix[randIndex]25 del(dataIndex[randIndex])#执行之后删除,避免重复执行26 return weights
2.4实际应用
和前面朴素贝叶斯都差不多,预处理数据-->>训练-->>测试
1 分类函数 2 def classifyVector(inX,weight): 3 prob = sigmoid(sum(inX*weight)) 4 if prob>0.5: return 1.0 5 return 0.0 6 def colicTest(): 7 frTrain = open('horseColicTraining.txt') 8 frtest = open('horseColicTest.txt') 9 trainingSet = []10 trainingLabel = []11 for line in frTrain.readlines():12 currLine = line.strip().split('\t')13 lineArr = []14 #最后一个是标签15 for i in range(len(currLine)-1):16 lineArr.append(float(currLine[i]))17 trainingSet.append(lineArr)18 trainingLabel.append(float(currLine[-1]))19 #改进之后的随机梯度下降法--->>>局部算法=在线学习20 trainWeight = stoGradAscent1(np.array(trainingSet),trainingLabel,500)21 errorCount = 0.022 numTestVec = 0.023 for line in frtest.readlines():24 numTestVec += 1.025 currLine =line.strip().split('\t')26 lineArr = []27 for i in range(21):28 lineArr.append(float(currLine[i]))29 if int(classifyVector(np.array(lineArr),trainWeight)) != int(currLine[21]):30 errorCount+=131 errorRate = (1.0*errorCount)/(1.0*numTestVec)32 print('the error Rate is : ',errorRate,'\n')33 return errorRate34 def multiTest():35 numTest = 10;errorSum = 0.036 for k in range(numTest):37 errorSum += colicTest()38 print('error Rate Average is : ',(errorSum/numTest))
2.5总程序
1 import numpy as np 2 import matplotlib.pyplot as plt 3 4 def loadDataSet(): 5 dataMat = [] 6 labelMat = [] 7 fr = open('testSet.txt') 8 for line in fr.readlines(): 9 lineArr = line.strip().split()#分割空格 10 #改变存储data:[[a,b],[c,d]]/ 11 # labels:[1,0,0,1...] 12 dataMat.append([1.0,float(lineArr[0]),float(lineArr[1])]) 13 labelMat.append([int(lineArr[2])]) 14 return dataMat, labelMat 15 def sigmoid(intX): 16 return 1.0/(1.0+np.exp(-intX)) 17 #全局梯度上升法 18 def gradAscent(dataMatIn,classLabels): 19 dataMatrix = np.mat(dataMatIn) 20 labelsMat = np.mat(classLabels) 21 m, n = dataMatrix.shape 22 alpha = 0.001 23 maxCycle = 200 24 weight = np.ones((n,1))#这里为了简单写,把b也当作一个w了 25 for k in range(maxCycle): 26 h = sigmoid(dataMatrix*weight) 27 error = labelsMat - np.mat(h) 28 weight = weight + alpha*dataMatrix.transpose()*error 29 return weight 30 31 def plotBestFit(weight): 32 dataMat, labelMat = loadDataSet() 33 dataArr = np.array(dataMat)#转化为数组 34 n = dataArr.shape[0] 35 xcode1=[];ycode1=[] 36 xcode2=[];ycode2=[] 37 for i in range(n): 38 if int(labelMat[i][0])==1: 39 xcode1.append(dataArr[i,1]) 40 ycode1.append(dataArr[i,2]) 41 else: 42 xcode2.append(dataArr[i,1]) 43 ycode2.append(dataArr[i,2]) 44 fig = plt.figure("data_x_y") 45 ax = fig.add_subplot(111) 46 ax.scatter(xcode1,ycode1,s=30,c='r',marker='s') 47 ax.scatter(xcode2,ycode2,s=30,c='g') 48 x = np.mat(np.arange(-3.0,3.0,0.1)) 49 y = (-weight[0]-weight[1]*x)/weight[2] 50 ax.plot(x.tolist()[0],y.tolist()[0]) 51 plt.xlabel('X1') 52 plt.ylabel('X2') 53 plt.show() 54 #局部梯度上升法-老版本 55 def stoGradAscent0(dataMatrix,classLabels): 56 m,n = dataMatrix.shape 57 alpha = 0.01 58 weights = np.ones(n)#最好别写0,因为0的拟合速度很慢 59 for i in range(m): 60 h = sigmoid(sum(dataMatrix[i]*weights)) 61 error = classLabels - h 62 weights = weights +alpha* error* dataMatrix[i] 63 return weights 64 #随机梯度上升法-新版本 65 def stoGradAscent1(dataMatraix,classLabels,numIter=150): 66 #alpha不断改变 67 #选取的样本随机改变 68 m,n = dataMatraix.shape 69 weights = np.ones(n) 70 for j in range(numIter): 71 dataIndex = list(range(m))#样本 72 for i in range(m): 73 alpha = 4/(1.0+j+i) +0.01#随着迭代次数和样本的训练次数的增加而减小 74 randIndex = int(np.random.uniform(0,len(dataIndex)))#随机样本下标 75 h = sigmoid(sum(dataMatraix[randIndex]*weights)) 76 error = classLabels[randIndex] - h 77 weights = weights +alpha*error*dataMatraix[randIndex] 78 del(dataIndex[randIndex])#执行之后删除,避免重复执行 79 return weights 80 #分类函数 81 def classifyVector(inX,weight): 82 prob = sigmoid(sum(inX*weight)) 83 if prob>0.5: return 1.0 84 return 0.0 85 def colicTest(): 86 frTrain = open('horseColicTraining.txt') 87 frtest = open('horseColicTest.txt') 88 trainingSet = [] 89 trainingLabel = [] 90 for line in frTrain.readlines(): 91 currLine = line.strip().split('\t') 92 lineArr = [] 93 #最后一个是标签 94 for i in range(len(currLine)-1): 95 lineArr.append(float(currLine[i])) 96 trainingSet.append(lineArr) 97 trainingLabel.append(float(currLine[-1])) 98 #改进之后的随机梯度下降法--->>>局部算法=在线学习 99 trainWeight = stoGradAscent1(np.array(trainingSet),trainingLabel,500)100 errorCount = 0.0101 numTestVec = 0.0102 for line in frtest.readlines():103 numTestVec += 1.0104 currLine =line.strip().split('\t')105 lineArr = []106 for i in range(21):107 lineArr.append(float(currLine[i]))108 if int(classifyVector(np.array(lineArr),trainWeight)) != int(currLine[21]):109 errorCount+=1110 errorRate = (1.0*errorCount)/(1.0*numTestVec)111 print('the error Rate is : ',errorRate,'\n')112 return errorRate113 def multiTest():114 numTest = 10;errorSum = 0.0115 for k in range(numTest):116 errorSum += colicTest()117 print('error Rate Average is : ',(errorSum/numTest))