@hanxiaoyang 2016-05-07T08:58:52.000000Z 字数 4720 阅读 2456

# 七月算法回归示例

线性回归与逻辑回归

from __future__ import divisionimport pandas as pd import numpy as np import matplotlib.pyplot as plt import math#将原始数据读成矩阵形式，用到了numpy科学计算包data = np.genfromtxt('ex1data1.txt',delimiter=',')x=data[:,0]y=data[:,1]m=y.size()a=ones(m,2)#计算损失函数值并返回def cost(x, y, theta=np.zeros((2,1))):    """    计算和返回损失函数用的    theta：线性回归参数    x：输入    y：目标/target    """    m = len(x)    J = 1/(2*m) * sum((x.dot(theta).flatten()- y)**2)    return J#梯度下降求解损失函数最小值def gradientDesc(x, y, theta=np.zeros((2,1)), alpha=.01,iterations=1500):    """    单变量线性回归的梯度下降    """    m = y.size    J = []    for numbers in range(iterations):        a = theta[0][0] - alpha*(1/m)*sum((x.dot(theta).flatten() - y)*x[:,0])        b = theta[1][0] - alpha*(1/m)*sum((x.dot(theta).flatten() - y)*x[:,1])        theta[0][0],theta[1][0]=a,b        print theta[0][0]        print theta[1][0]        J.append(cost(x,y,theta))        print 'Cost: ' + str(J[-1])    return theta#scatterplot of data with option to save figure.def scatterPlot(x,y,yp=None,savePng=False):    plt.xlabel('Population of City in 10,000s')    plt.ylabel('Profit in \$10,000s')    plt.scatter(x, y, marker='x')    if yp != None:        plt.plot(x,yp)    if savePng==false:        plt.show()    else:        name = raw_input('Name Figure File: ')        plt.savefig(name+'.png')

6.1101,17.5925.5277,9.13028.5186,13.6627.0032,11.8545.8598,6.82338.3829,11.8867.4764,4.34838.5781,126.4862,6.59875.0546,3.81665.7107,3.252214.164,15.5055.734,3.15518.4084,7.22585.6407,0.716185.3794,3.51296.3654,5.30485.1301,0.560776.4296,3.65187.0708,5.38936.1891,3.138620.27,21.7675.4901,4.2636.3261,5.18755.5649,3.082518.945,22.63812.828,13.50110.957,7.046713.176,14.69222.203,24.1475.2524,-1.226.5894,5.99669.2482,12.1345.8918,1.84958.2111,6.54267.9334,4.56238.0959,4.11645.6063,3.392812.836,10.1176.3534,5.49745.4069,0.556576.8825,3.911511.708,5.38545.7737,2.44067.8247,6.73187.0931,1.04635.0702,5.13375.8014,1.84411.7,8.00435.5416,1.01797.5402,6.75045.3077,1.83967.4239,4.28857.6031,4.99816.3328,1.42336.3589,-1.42116.2742,2.47565.6397,4.60429.3102,3.96249.4536,5.41418.8254,5.16945.1793,-0.7427921.279,17.92914.908,12.05418.959,17.0547.2182,4.88528.2951,5.744210.236,7.77545.4994,1.017320.341,20.99210.136,6.67997.3345,4.02596.0062,1.27847.2259,3.34115.0269,-2.68076.5479,0.296787.5386,3.88455.0365,5.701410.274,6.75265.1077,2.05765.7292,0.479535.1884,0.204216.3557,0.678619.7687,7.54356.5159,5.34368.5172,4.24159.1802,6.79816.002,0.926955.5204,0.1525.0594,2.82145.7077,1.84517.6366,4.29595.8707,7.20295.3054,1.98698.2934,0.1445413.394,9.05515.4369,0.61705
from numpy import loadtxt, where  from pylab import scatter, show, legend, xlabel, ylabel  #读取数据 data = loadtxt('/home/Julyedu/data/data1.txt', delimiter=',')  X = data[:, 0:2]  y = data[:, 2]  pos = where(y == 1)  neg = where(y == 0)  scatter(X[pos, 0], X[pos, 1], marker='o', c='b')  scatter(X[neg, 0], X[neg, 1], marker='x', c='r')  xlabel('Feature1/Exam 1 score')  ylabel('Feature2/Exam 2 score')  legend(['Fail', 'Pass'])  show()  

def sigmoid(X):      '''定义sigmoid函数'''      den =1.0+ e **(-1.0* X)      gz =1.0/ den      return gz  def compute_cost(theta,X,y):      '''计算损失函数值'''      m = X.shape[0]#训练样本个数     theta = reshape(theta,(len(theta),1))#参数theta    J =(1./m)*(-transpose(y).dot(log(sigmoid(X.dot(theta))))- transpose(1-y).dot(log(1-sigmoid(X.dot(theta)))))      grad = transpose((1./m)*transpose(sigmoid(X.dot(theta))- y).dot(X))      return J[0][0],grad  def compute_grad(theta, X, y):      '''计算梯度'''      theta.shape =(1,3)      grad = zeros(3)      h = sigmoid(X.dot(theta.T))      delta = h - y      l = grad.size      for i in range(l):          sumdelta = delta.T.dot(X[:, i])          grad[i]=(1.0/ m)* sumdelta *-1      theta.shape =(3,)      return  grad  

from numpy import loadtxt, where  from pylab import scatter, show, legend, xlabel, ylabel  #读取数据 data = loadtxt('/home/Julyedu/data/data2.txt', delimiter=',')  X = data[:, 0:2]  y = data[:, 2]  pos = where(y == 1)  neg = where(y == 0)  scatter(X[pos, 0], X[pos, 1], marker='o', c='b')  scatter(X[neg, 0], X[neg, 1], marker='x', c='r')  xlabel('Microchip Test 1')  ylabel('Microchip Test 1')  legend(['0', '1'])  show()  

def map_feature(x1, x2):      '''    特征映射得到多项式组合特征     X1, X2, X1 ** 2, X2 ** 2, X1*X2, X1*X2 ** 2,等等...     '''      x1.shape =(x1.size,1)      x2.shape =(x2.size,1)      degree =6      mapped_fea = ones(shape=(x1[:,0].size,1))      m, n = mapped_fea.shape      for i in range(1, degree +1):          for j in range(i +1):              r =(x1 **(i - j))*(x2 ** j)              mapped_fea = append(mapped_fea, r, axis=1)      return mapped_fea  mapped_fea = map_feature(X[:,0], X[:,1]) 

def cost_function_reg(theta, X, y, l):      '''    计算损失函数和梯度     '''      h = sigmoid(X.dot(theta))      thetaR = theta[1:,0]      J =(1.0/ m)*((-y.T.dot(log(h)))-((1- y.T).dot(log(1.0- h)))) \              +(l /(2.0* m))*(thetaR.T.dot(thetaR))      delta = h - y      sum_delta = delta.T.dot(X[:,1])      grad1 =(1.0/ m)* sumdelta      XR = X[:,1:X.shape[1]]      sum_delta = delta.T.dot(XR)      grad =(1.0/ m)*(sum_delta + l * thetaR)      out = zeros(shape=(grad.shape[0], grad.shape[1]+1))      out[:,0]= grad1      out[:,1:]= grad      return J.flatten(), out.T.flatten()  m, n = X.shape  y.shape =(m,1)  it = map_feature(X[:,0], X[:,1])  #初始化参数 initial_theta = zeros(shape=(it.shape[1],1))  #正则化系数设为1 l =1  cost, grad = cost_function_reg(initial_theta, it, y, l)  def decorated_cost(theta):      return cost_function_reg(theta, it, y, l)  print fmin_bfgs(decorated_cost, initial_theta, maxfun=500)  

• 私有
• 公开
• 删除