@hanxiaoyang 2016-05-07T08:58:52.000000Z 字数 4720 阅读 2749

七月算法回归示例

线性回归与逻辑回归

梯度下降

线性回归代码

from __future__ import division
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import math
#将原始数据读成矩阵形式，用到了numpy科学计算包
data = np.genfromtxt('ex1data1.txt',delimiter=',')
x=data[:,0]
y=data[:,1]
m=y.size()
a=ones(m,2)
#计算损失函数值并返回
def cost(x, y, theta=np.zeros((2,1))):
    """
    计算和返回损失函数用的
    theta：线性回归参数
    x：输入
    y：目标/target
    """
    m = len(x)
    J = 1/(2*m) * sum((x.dot(theta).flatten()- y)**2)
    return J
#梯度下降求解损失函数最小值
def gradientDesc(x, y, theta=np.zeros((2,1)), alpha=.01,iterations=1500):
    """
    单变量线性回归的梯度下降
    """
    m = y.size
    J = []
    for numbers in range(iterations):
        a = theta[0][0] - alpha*(1/m)*sum((x.dot(theta).flatten() - y)*x[:,0])
        b = theta[1][0] - alpha*(1/m)*sum((x.dot(theta).flatten() - y)*x[:,1])
        theta[0][0],theta[1][0]=a,b
        print theta[0][0]
        print theta[1][0]
        J.append(cost(x,y,theta))
        print 'Cost: ' + str(J[-1])
    return theta
#scatterplot of data with option to save figure.
def scatterPlot(x,y,yp=None,savePng=False):
    plt.xlabel('Population of City in 10,000s')
    plt.ylabel('Profit in $10,000s')
    plt.scatter(x, y, marker='x')
    if yp != None:
        plt.plot(x,yp)
    if savePng==false:
        plt.show()
    else:
        name = raw_input('Name Figure File: ')
        plt.savefig(name+'.png')

数据

6.1101,17.592
5.5277,9.1302
8.5186,13.662
7.0032,11.854
5.8598,6.8233
8.3829,11.886
7.4764,4.3483
8.5781,12
6.4862,6.5987
5.0546,3.8166
5.7107,3.2522
14.164,15.505
5.734,3.1551
8.4084,7.2258
5.6407,0.71618
5.3794,3.5129
6.3654,5.3048
5.1301,0.56077
6.4296,3.6518
7.0708,5.3893
6.1891,3.1386
20.27,21.767
5.4901,4.263
6.3261,5.1875
5.5649,3.0825
18.945,22.638
12.828,13.501
10.957,7.0467
13.176,14.692
22.203,24.147
5.2524,-1.22
6.5894,5.9966
9.2482,12.134
5.8918,1.8495
8.2111,6.5426
7.9334,4.5623
8.0959,4.1164
5.6063,3.3928
12.836,10.117
6.3534,5.4974
5.4069,0.55657
6.8825,3.9115
11.708,5.3854
5.7737,2.4406
7.8247,6.7318
7.0931,1.0463
5.0702,5.1337
5.8014,1.844
11.7,8.0043
5.5416,1.0179
7.5402,6.7504
5.3077,1.8396
7.4239,4.2885
7.6031,4.9981
6.3328,1.4233
6.3589,-1.4211
6.2742,2.4756
5.6397,4.6042
9.3102,3.9624
9.4536,5.4141
8.8254,5.1694
5.1793,-0.74279
21.279,17.929
14.908,12.054
18.959,17.054
7.2182,4.8852
8.2951,5.7442
10.236,7.7754
5.4994,1.0173
20.341,20.992
10.136,6.6799
7.3345,4.0259
6.0062,1.2784
7.2259,3.3411
5.0269,-2.6807
6.5479,0.29678
7.5386,3.8845
5.0365,5.7014
10.274,6.7526
5.1077,2.0576
5.7292,0.47953
5.1884,0.20421
6.3557,0.67861
9.7687,7.5435
6.5159,5.3436
8.5172,4.2415
9.1802,6.7981
6.002,0.92695
5.5204,0.152
5.0594,2.8214
5.7077,1.8451
7.6366,4.2959
5.8707,7.2029
5.3054,1.9869
8.2934,0.14454
13.394,9.0551
5.4369,0.61705

逻辑回归
https://github.com/tarlen5/coursera_ml/tree/master/unit6

from numpy import loadtxt, where  
from pylab import scatter, show, legend, xlabel, ylabel  
#读取数据 
data = loadtxt('/home/Julyedu/data/data1.txt', delimiter=',')  
X = data[:, 0:2]  
y = data[:, 2]  
pos = where(y == 1)  
neg = where(y == 0)  
scatter(X[pos, 0], X[pos, 1], marker='o', c='b')  
scatter(X[neg, 0], X[neg, 1], marker='x', c='r')  
xlabel('Feature1/Exam 1 score')  
ylabel('Feature2/Exam 2 score')  
legend(['Fail', 'Pass'])  
show()

def sigmoid(X):  
    '''定义sigmoid函数'''  
    den =1.0+ e **(-1.0* X)  
    gz =1.0/ den  
    return gz  
def compute_cost(theta,X,y):  
    '''计算损失函数值'''  
    m = X.shape[0]#训练样本个数 
    theta = reshape(theta,(len(theta),1))#参数theta
    J =(1./m)*(-transpose(y).dot(log(sigmoid(X.dot(theta))))- transpose(1-y).dot(log(1-sigmoid(X.dot(theta)))))  
    grad = transpose((1./m)*transpose(sigmoid(X.dot(theta))- y).dot(X))  
    return J[0][0],grad  
def compute_grad(theta, X, y):  
    '''计算梯度'''  
    theta.shape =(1,3)  
    grad = zeros(3)  
    h = sigmoid(X.dot(theta.T))  
    delta = h - y  
    l = grad.size  
    for i in range(l):  
        sumdelta = delta.T.dot(X[:, i])  
        grad[i]=(1.0/ m)* sumdelta *-1  
    theta.shape =(3,)  
    return  grad

from numpy import loadtxt, where  
from pylab import scatter, show, legend, xlabel, ylabel  
#读取数据 
data = loadtxt('/home/Julyedu/data/data2.txt', delimiter=',')  
X = data[:, 0:2]  
y = data[:, 2]  
pos = where(y == 1)  
neg = where(y == 0)  
scatter(X[pos, 0], X[pos, 1], marker='o', c='b')  
scatter(X[neg, 0], X[neg, 1], marker='x', c='r')  
xlabel('Microchip Test 1')  
ylabel('Microchip Test 1')  
legend(['0', '1'])  
show()

data1

def map_feature(x1, x2):  
    '''
    特征映射得到多项式组合特征 
    X1, X2, X1 ** 2, X2 ** 2, X1*X2, X1*X2 ** 2,等等... 
    '''  
    x1.shape =(x1.size,1)  
    x2.shape =(x2.size,1)  
    degree =6  
    mapped_fea = ones(shape=(x1[:,0].size,1))  
    m, n = mapped_fea.shape  
    for i in range(1, degree +1):  
        for j in range(i +1):  
            r =(x1 **(i - j))*(x2 ** j)  
            mapped_fea = append(mapped_fea, r, axis=1)  
    return mapped_fea  
mapped_fea = map_feature(X[:,0], X[:,1])

data1

def cost_function_reg(theta, X, y, l):  
    '''
    计算损失函数和梯度 
    '''  
    h = sigmoid(X.dot(theta))  
    thetaR = theta[1:,0]  
    J =(1.0/ m)*((-y.T.dot(log(h)))-((1- y.T).dot(log(1.0- h)))) \  
            +(l /(2.0* m))*(thetaR.T.dot(thetaR))  
    delta = h - y  
    sum_delta = delta.T.dot(X[:,1])  
    grad1 =(1.0/ m)* sumdelta  
    XR = X[:,1:X.shape[1]]  
    sum_delta = delta.T.dot(XR)  
    grad =(1.0/ m)*(sum_delta + l * thetaR)  
    out = zeros(shape=(grad.shape[0], grad.shape[1]+1))  
    out[:,0]= grad1  
    out[:,1:]= grad  
    return J.flatten(), out.T.flatten()  
m, n = X.shape  
y.shape =(m,1)  
it = map_feature(X[:,0], X[:,1])  
#初始化参数 
initial_theta = zeros(shape=(it.shape[1],1))  
#正则化系数设为1 
l =1  
cost, grad = cost_function_reg(initial_theta, it, y, l)  
def decorated_cost(theta):  
    return cost_function_reg(theta, it, y, l)  
print fmin_bfgs(decorated_cost, initial_theta, maxfun=500)

data1

七月算法回归示例

内容目录