[关闭]
@zsh-o 2018-07-19T13:23:30.000000Z 字数 5447 阅读 1154

7 - SVM-SMO —— 代码

《统计学习方法》


只有代码和执行可视化结果,原理请移步另一篇:SVM - 原理

  1. %matplotlib inline
  2. import numpy as np
  3. from matplotlib import pyplot as plt
  4. epsilon = 1e-8
  1. class SVM(object):
  2. def linear_kernel(self, x, y, b=1.):
  3. return x @ y.T + b
  4. def rbf_kernel(self, x, y, sigma=1.):
  5. if np.ndim(x) == 1 and np.ndim(y) == 1:
  6. return np.exp(- np.linalg.norm(x - y) / (2 * sigma **2))
  7. elif np.ndim(x) > 1 and np.ndim(y) > 1:
  8. return np.exp(- np.linalg.norm(x[:, np.newaxis] - y[np.newaxis, :], axis=2) / (2 * sigma **2))
  9. else:
  10. return np.exp(- np.linalg.norm(x - y, axis=1) / (2 * sigma ** 2))
  11. def __init__(self, C=1.0, tol=1e-3, eps=1e-3, kernel_func='rbf'):
  12. self.C = C
  13. self.tol = tol
  14. self.eps = eps
  15. self.b = 0.
  16. if kernel_func is 'linear':
  17. self.kernel_func = self.linear_kernel
  18. elif kernel_func is 'rbf':
  19. self.kernel_func = self.rbf_kernel
  20. else:
  21. if callable(kernel_func) is True:
  22. self.kernel_func = kernel_func
  23. else:
  24. print('Error kernel function')
  25. def objective_function(self, alphas):
  26. calpha = alphas[:, np.newaxis]
  27. cY = self.Y[:, np.newaxis]
  28. return 0.5 * np.sum(calpha * calpha.T * cY * cY.T * self.K) - np.sum(alphas)
  29. def decision_function(self, x_test):
  30. k = self.kernel_func(self.X, x_test)
  31. return self.alpha * self.Y @ k + self.b
  32. def update_E(self):
  33. self.E = self.alpha * self.Y @ self.K + self.b - self.Y
  34. def step(self, i, j):
  35. if i == j:
  36. return 0
  37. alpha1, alpha2 = self.alpha[[i, j]] # old alpha1, old alpha2
  38. y1, y2 = self.Y[[i, j]]
  39. E1, E2 = self.E[[i, j]]
  40. s = y1 * y2
  41. K11, K12, K22 = self.K[i, i], self.K[i, j], self.K[j, j]
  42. eta = K11 + K22 - 2 * K12 ## 非正定核会出现eta < 0,K(x,y) = <phi(x),phi(y)>
  43. b = self.b # old b
  44. if y1 == y2:
  45. L = max(0, alpha2 + alpha1 - self.C)
  46. H = min(self.C, alpha2 + alpha1)
  47. else:
  48. L = max(0, alpha2 - alpha1)
  49. H = min(self.C, alpha2 - alpha1 + self.C)
  50. if L == H:
  51. return 0
  52. if eta > 0:
  53. a2 = alpha2 + y2 * (E1 - E2) / eta
  54. a2 = min(a2, H)
  55. a2 = max(a2, L)
  56. else:
  57. alpha_adj = self.alpha.copy()
  58. alpha_adj[j] = L
  59. Lobj = self.objective_function(alpha_adj)
  60. alpha_adj[j] = H
  61. Hobj = self.objective_function(alpha_adj)
  62. if Lobj < (Hobj - self.eps):
  63. a2 = L
  64. elif Lobj > (Hobj + self.eps):
  65. a2 = H
  66. else:
  67. a2 = alpha2
  68. if a2 < epsilon:
  69. a2 = 0.
  70. elif a2 > (self.C - epsilon):
  71. a2 = self.C
  72. if np.abs(a2 - alpha2) < self.eps * (a2 + alpha2 + self.eps): # ?
  73. return 0
  74. a1 = alpha1 + s * (alpha2 - a2)
  75. b1 = y1 * K11 * (alpha1 - a1) + y2 * K12 * (alpha2 - a2) - E1 + b
  76. b2 = y1 * K12 * (alpha1 - a1) + y2 * K22 * (alpha2 - a2) - E2 + b
  77. if a1 > 0 and a1 < self.C:
  78. b_new = b1
  79. elif a2 > 0 and a2 < self.C:
  80. b_new = b2
  81. else:
  82. b_new = (b1 + b2) * 0.5
  83. self.b = b_new
  84. self.alpha[i] = a1
  85. self.alpha[j] = a2
  86. self.update_E()
  87. return 1
  88. def examineExample(self, j):
  89. alpha2 = self.alpha[j]
  90. E2 = self.E[j]
  91. y2 = self.Y[j]
  92. r = E2 * y2 # y_i * f_i - 1
  93. if (r < -self.tol and alpha2 < self.C) or (r > self.tol and alpha2 > 0): ## 检验tol-KKT条件,tol指KKT条件的精度
  94. if len(self.alpha[(self.alpha!=0) & (self.alpha != self.C)]) > 1: ## 为什么要保证边界上的点大于1个?才选择最优的alpha2,一般来说更改边界上的点才会使得值最大,毕竟边界上的点对最终优化结果影响最大
  95. # delta_E = np.abs(E2 - self.E)
  96. delta_E = np.abs((E2 - self.E) / self.K[j, :]) ##
  97. i = np.argmax(delta_E)
  98. if self.step(i, j):
  99. return 1
  100. for i in np.roll(np.where((self.alpha != 0) & (self.alpha != self.C))[0],
  101. np.random.choice(np.arange(self.N))):
  102. if self.step(i, j):
  103. return 1
  104. for i in np.roll(np.arange(self.N), np.random.choice(np.arange(self.N))):
  105. if self.step(i, j):
  106. return 1
  107. return 0
  108. def fit(self, X,Y):
  109. self.X, self.Y = X, Y
  110. self.N, self.M = X.shape
  111. self.alpha = np.zeros(self.N)
  112. self.K = self.kernel_func(X, X)
  113. self.update_E()
  114. numChanged = 0
  115. examineAll = True
  116. while numChanged > 0 or examineAll:
  117. numChanged = 0
  118. if examineAll is True:
  119. for j in range(self.N):
  120. numChanged += self.examineExample(j)
  121. else:
  122. for j in np.where((self.alpha != 0) & (self.alpha != self.C))[0]:
  123. numChanged += self.examineExample(j)
  124. if examineAll is True:
  125. examineAll = False
  126. elif numChanged == 0:
  127. examineAll = True
  128. def predict(self, Xs):
  129. return np.sign(self.decision_function(Xs))
  1. from sklearn.datasets import make_moons
  2. from sklearn.datasets import make_circles
  3. from sklearn.preprocessing import StandardScaler
  1. X, Y = make_moons(n_samples=500, shuffle=True, noise=0.1)
  2. scaler = StandardScaler()
  3. X = scaler.fit_transform(X)
  4. Y[Y==0] = -1
  1. svm = SVM(C=1)
  2. svm.fit(X, Y)
  1. def plot_decision_boundary(model, resolution=100, colors=('b', 'k', 'r'), figsize=(14,6)):
  2. plt.figure(figsize=figsize)
  3. xrange = np.linspace(model.X[:,0].min(), model.X[:,0].max(), resolution)
  4. yrange = np.linspace(model.X[:,1].min(), model.X[:,1].max(), resolution)
  5. grid = [[model.decision_function(np.array([xr, yr])) for yr in yrange] for xr in xrange]
  6. grid = np.array(grid).reshape(len(xrange), len(yrange))
  7. # 左边
  8. plt.subplot(121)
  9. c_1_i = model.Y == -1
  10. plt.scatter(model.X[:,0][c_1_i], model.X[:,1][c_1_i], c='blueviolet', marker='.', alpha=0.8, s=20)
  11. c_2_i = np.logical_not(c_1_i)
  12. plt.scatter(model.X[:,0][c_2_i], model.X[:,1][c_2_i], c='teal', marker='.', alpha=0.8, s=20)
  13. plt.contour(xrange, yrange, grid.T, (0,), linewidths=(1,),
  14. linestyles=('-',), colors=colors[1])
  15. #右边
  16. plt.subplot(122)
  17. plt.contour(xrange, yrange, grid.T, (-1, 0, 1), linewidths=(1, 1, 1),
  18. linestyles=('--', '-', '--'), colors=colors)
  19. c_1_i = model.Y == -1
  20. plt.scatter(model.X[:,0][c_1_i], model.X[:,1][c_1_i], c='blueviolet', marker='.', alpha=0.6, s=20)
  21. c_2_i = np.logical_not(c_1_i)
  22. plt.scatter(model.X[:,0][c_2_i], model.X[:,1][c_2_i], c='teal', marker='.', alpha=0.6, s=20)
  23. mask1 = (model.alpha > epsilon) & (model.Y == -1)
  24. mask2 = (model.alpha > epsilon) & (model.Y == 1)
  25. plt.scatter(model.X[:,0][mask1], model.X[:,1][mask1],
  26. c='blueviolet', marker='v', alpha=1, s=20)
  27. plt.scatter(model.X[:,0][mask2], model.X[:,1][mask2],
  28. c='teal', marker='v', alpha=1, s=20)
  1. plot_decision_boundary(svm)

output_6_0.png-61.8kB

  1. X, Y = make_moons(n_samples=500, shuffle=True, noise=0.2)
  2. scaler = StandardScaler()
  3. X = scaler.fit_transform(X)
  4. Y[Y==0] = -1
  1. svm = SVM(C=1)
  2. svm.fit(X, Y)
  1. plot_decision_boundary(svm)

output_9_0.png-57.5kB

  1. X, Y = make_circles(n_samples=500, shuffle=True, factor=0.3, noise=0.1)
  2. scaler = StandardScaler()
  3. X = scaler.fit_transform(X)
  4. Y[Y==0] = -1
  1. svm = SVM(C=1)
  2. svm.fit(X, Y)
  1. plot_decision_boundary(svm)

output_12_0.png-55.4kB

  1. X, Y = make_circles(n_samples=500, shuffle=True, factor=0.3, noise=0.25)
  2. scaler = StandardScaler()
  3. X = scaler.fit_transform(X)
  4. Y[Y==0] = -1
  1. svm = SVM(C=1)
  2. svm.fit(X, Y)
  1. plot_decision_boundary(svm)

output_15_0.png-55.3kB


SMO的原始论文写得非常好,现有的SMO的代码基本上都是按照论文中给出的代码结构来写的

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注