@hainingwyx 2016-11-28T06:20:32.000000Z 字数 13869 阅读 1974

Numpy基础

Python Numpy

NumPy Basics: Arrays and Vectorized Computation

%matplotlib inline

from __future__ import division
from numpy.random import randn
import numpy as np
np.set_printoptions(precision=4, suppress=True)

The NumPy ndarray: a multidimensional array object

data = randn(2, 3)

print data
print data * 10
print data + data

[[ 2.5656  1.3439  1.0321]
 [-1.1227  1.4023 -0.9263]]
[[ 25.6562  13.439   10.3213]
 [-11.2269  14.0228  -9.2631]]
[[ 5.1312  2.6878  2.0643]
 [-2.2454  2.8046 -1.8526]]

print data.shape
print data.dtype

(2L, 3L)
float64

Creating ndarrays/创建数组

data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)#接受一切序列型的对象
arr1

array([ 6. ,  7.5,  8. ,  0. ,  1. ])

data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
print arr2
print arr2.ndim
print arr2.shape

[[1 2 3 4]
 [5 6 7 8]]
2
(2L, 4L)

print arr1.dtype
print arr2.dtype

float64
int32

print np.zeros(10)
print np.zeros((3, 6))
print np.empty((2, 3, 2))#d多数情况下返回未初始化的垃圾值

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[[ 0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.]]
[[[  4.9811e-316   2.8162e-322]
  [  0.0000e+000   0.0000e+000]
  [  0.0000e+000   1.6159e+184]]

 [[  5.5817e-091   1.0405e-042]
  [  8.2635e-072   6.5303e-042]
  [  6.4822e+170   4.9343e+257]]]

np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

数组创建函数
array：将输入数据（列表元组数组或其他序列类型）转换为ndarray
asarray：将输入转换为ndarray，如果输入本身就是一个ndarray就不进行复制
arange：类似内置的range,返回ndarray而不是列表
ones/ones_like:根据指定的形状和dtype创建一个全一数组。后者以另一个数组为参数，根据形状和dtype创建全1数组
zeros/zeroslike:同上，产生全0数组
empty/empty_like:创建新数组，只分配内存空间但不填充任何值
eye/identity:创建一个正方的N*N的单位矩阵

Data Types for ndarrays

arr1 = np.array([1, 2, 3], dtype=np.float64)
arr2 = np.array([1, 2, 3], dtype=np.int32)
print arr1.dtype
arr2.dtype

float64

dtype('int32')

arr = np.array([1, 2, 3, 4, 5])
print arr.dtype
float_arr = arr.astype(np.float64)#显式转换dtype，并创建新数组
float_arr.dtype

int32

dtype('float64')

arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
print arr
arr.astype(np.int32)

[  3.7  -1.2  -2.6   0.5  12.9  10.1]

array([ 3, -1, -2,  0, 12, 10])

numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)#字符也可以转换
numeric_strings.astype(float)

array([  1.25,  -9.6 ,  42.  ])

int_array = np.arange(10)
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64)
int_array.astype(calibers.dtype)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.])

empty_uint32 = np.empty(8, dtype='u4')
empty_uint32

array([1, 2, 3, 4, 5, 6, 7, 8], dtype=uint32)

Operations between arrays and scalars

arr = np.array([[1., 2., 3.], [4., 5., 6.]])
print arr
print arr * arr
print arr - arr

[[ 1.  2.  3.]
 [ 4.  5.  6.]]
[[  1.   4.   9.]
 [ 16.  25.  36.]]
[[ 0.  0.  0.]
 [ 0.  0.  0.]]

print 1 / arr
print arr ** 0.5

[[ 1.      0.5     0.3333]
 [ 0.25    0.2     0.1667]]
[[ 1.      1.4142  1.7321]
 [ 2.      2.2361  2.4495]]

Basic indexing and slicing

arr = np.arange(10)
print arr
print arr[5]
print arr[5:8]
arr[5:8] = 12#数组的广播
arr

[0 1 2 3 4 5 6 7 8 9]
5
[5 6 7]





array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

arr_slice = arr[5:8]
arr_slice[1] = 12345
print arr
arr_slice[:] = 64
print arr

[    0     1     2     3     4    12 12345    12     8     9]
[ 0  1  2  3  4 64 64 64  8  9]

arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[2]

array([7, 8, 9])

print arr2d[0][2]#两者等价
print arr2d[0, 2]

3
3

arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

old_values = arr3d[0].copy()#副本
arr3d[0] = 42
print arr3d
arr3d[0] = old_values
print arr3d

[[[42 42 42]
  [42 42 42]]

 [[ 7  8  9]
  [10 11 12]]]
[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]

arr3d[1, 0]

array([7, 8, 9])

Indexing with slices

arr[1:6]

array([ 1,  2,  3,  4, 64])

print arr2d
arr2d[:2]

[[1 2 3]
 [4 5 6]
 [7 8 9]]





array([[1, 2, 3],
       [4, 5, 6]])

arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

print arr2d[1, :2]
arr2d[2, :1]

[4 5]

array([7])

arr2d[:, :1]

array([[1],
       [4],
       [7]])

arr2d[:2, 1:] = 0

Boolean indexing

names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = randn(7, 4)
print names
data

['Bob' 'Joe' 'Will' 'Bob' 'Will' 'Joe' 'Joe']

array([[ 1.0384,  1.4991, -2.0842, -0.7308],
       [ 1.6778,  1.1454,  0.501 , -0.8916],
       [-0.3157,  1.4617,  0.5456,  1.4463],
       [ 0.554 ,  1.6453,  1.8337,  0.3457],
       [-2.7568,  0.5697,  0.7377,  0.0827],
       [ 1.0465,  0.0075, -0.7373,  0.954 ],
       [-0.2963,  1.4198,  2.3366, -0.4853]])

names == 'Bob'

array([ True, False, False,  True, False, False, False], dtype=bool)

data[names == 'Bob']

array([[ 1.0384,  1.4991, -2.0842, -0.7308],
       [ 0.554 ,  1.6453,  1.8337,  0.3457]])

print data[names == 'Bob', 2:]
data[names == 'Bob', 3]

[[-2.0842 -0.7308]
 [ 1.8337  0.3457]]





array([-0.7308,  0.3457])

names != 'Bob'
data[-(names == 'Bob')]#-和!=都可以

C:\Users\WangYixin\Anaconda2\lib\site-packages\ipykernel\__main__.py:2: DeprecationWarning: numpy boolean negative, the `-` operator, is deprecated, use the `~` operator or the logical_not function instead.
  from ipykernel import kernelapp as app





array([[ 1.6778,  1.1454,  0.501 , -0.8916],
       [-0.3157,  1.4617,  0.5456,  1.4463],
       [-2.7568,  0.5697,  0.7377,  0.0827],
       [ 1.0465,  0.0075, -0.7373,  0.954 ],
       [-0.2963,  1.4198,  2.3366, -0.4853]])

mask = (names == 'Bob') | (names == 'Will')
print mask
print data[mask]

[ True False  True  True  True False False]
[[ 1.0384  1.4991 -2.0842 -0.7308]
 [-0.3157  1.4617  0.5456  1.4463]
 [ 0.554   1.6453  1.8337  0.3457]
 [-2.7568  0.5697  0.7377  0.0827]]

data[data < 0] = 0
data

array([[ 1.0384,  1.4991,  0.    ,  0.    ],
       [ 1.6778,  1.1454,  0.501 ,  0.    ],
       [ 0.    ,  1.4617,  0.5456,  1.4463],
       [ 0.554 ,  1.6453,  1.8337,  0.3457],
       [ 0.    ,  0.5697,  0.7377,  0.0827],
       [ 1.0465,  0.0075,  0.    ,  0.954 ],
       [ 0.    ,  1.4198,  2.3366,  0.    ]])

data[names != 'Joe'] = 7
data

array([[ 7.    ,  7.    ,  7.    ,  7.    ],
       [ 1.6778,  1.1454,  0.501 ,  0.    ],
       [ 7.    ,  7.    ,  7.    ,  7.    ],
       [ 7.    ,  7.    ,  7.    ,  7.    ],
       [ 7.    ,  7.    ,  7.    ,  7.    ],
       [ 1.0465,  0.0075,  0.    ,  0.954 ],
       [ 0.    ,  1.4198,  2.3366,  0.    ]])

Fancy indexing/花式索引

arr = np.empty((8, 4))
for i in range(8):
    arr[i] = i
arr

array([[ 0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.],
       [ 5.,  5.,  5.,  5.],
       [ 6.,  6.,  6.,  6.],
       [ 7.,  7.,  7.,  7.]])

arr[[4, 3, 0, 6]]#选取行子集

array([[ 4.,  4.,  4.,  4.],
       [ 3.,  3.,  3.,  3.],
       [ 0.,  0.,  0.,  0.],
       [ 6.,  6.,  6.,  6.]])

arr[[-3, -5, -7]]#从末尾开始选行

array([[ 5.,  5.,  5.,  5.],
       [ 3.,  3.,  3.,  3.],
       [ 1.,  1.,  1.,  1.]])

# more on reshape in Chapter 12
arr = np.arange(32).reshape((8, 4))
print arr
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]]





array([ 4, 23, 29, 10])

arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

arr[np.ix_([1, 5, 7, 2], [0, 3, 1, 2])]#前面序列选取行，后面序列实现列的变换

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

Transposing arrays and swapping axes

arr = np.arange(15).reshape((3, 5))
print arr
arr.T

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]





array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

arr = np.random.randn(6, 3)
np.dot(arr.T, arr)#矩阵内积
# print arr.T * arr 每个元素对影响相乘

array([[ 10.1151,  -0.9756,   1.1168],
       [ -0.9756,   1.4627,   1.0949],
       [  1.1168,   1.0949,   1.6275]])

arr = np.arange(16).reshape((2, 2, 4))
print arr
arr.transpose((2, 1 ,0))#括号内为3个维度

[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]]





array([[[ 0,  8],
        [ 4, 12]],

       [[ 1,  9],
        [ 5, 13]],

       [[ 2, 10],
        [ 6, 14]],

       [[ 3, 11],
        [ 7, 15]]])

print arr
arr.swapaxes(1, 2)

[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]]





array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

Universal Functions: Fast element-wise array functions

arr = np.arange(10)
print np.sqrt(arr)
print np.exp(arr)

[ 0.      1.      1.4142  1.7321  2.      2.2361  2.4495  2.6458  2.8284
  3.    ]
[    1.         2.7183     7.3891    20.0855    54.5982   148.4132
   403.4288  1096.6332  2980.958   8103.0839]

x = randn(8)
y = randn(8)
print x
print y
np.maximum(x, y) # element-wise maximum

[ 0.3919 -0.971  -1.4745 -0.5803  1.792  -0.7137 -0.07    0.6102]
[-0.3633 -0.4719  0.8457 -0.7584  0.873  -0.1103  0.4284 -0.043 ]





array([ 0.3919, -0.4719,  0.8457, -0.5803,  1.792 , -0.1103,  0.4284,
        0.6102])

arr = randn(7) * 5
np.modf(arr)#返回两个数组，一个整数数组，一个小数数组

(array([-0.4014,  0.982 , -0.8753,  0.2723, -0.6351,  0.6072, -0.4787]),
 array([ -4.,  10.,  -0.,   3.,  -8.,   2.,  -0.]))

一元函数

abs/fabs：计算整数、浮点数、复数的绝对值。非复数值，可以使用fans速度快
sqrt：计算平方根
square：计算平方
exp：计算指数
log/log10/log2/log1p：对数，最后为log(1+x)
sign：计算元素的正负号
ceil：计算ceiling
floor：计算floor

rint：四舍五入，保留dtype
modf：小数很整数以独立数组的形式返回
isnan：表示哪些值是NaN的布尔型数组
isfinite/isinf：哪些有穷，哪些无穷的布尔型数组
cos/cosh/sin/sinh/tan/tanh：三角函数
arccos/arccosh/arcsin/arcsinh/arctan/arctanh：反三角
logical_not：计算个元素not X 的真值，相当于-arr
二元函数
add：对应相加
subtract：对应相减
multiply：元素相乘
divide/floor_divide：除法或向下整除法
power：对应数组元素的指数
maximum/fmax：最大值计算，后者忽略NaN
minmum/fmin：最小值计算
mod：求模计算
copysign：将第二个数组中的值的符号复制费第一个数组中的值
greater/greater_equal：>/>=
less/lessq_equal： equal/not_equal：==/!=
logical_and/logical_or/logic_xor：& | ^

Data processing using arrays

points = np.arange(-5, 5, 0.01) # 1000 equally spaced points
xs, ys = np.meshgrid(points, points)
ys

array([[-5.  , -5.  , -5.  , ..., -5.  , -5.  , -5.  ],
       [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
       [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
       ..., 
       [ 4.97,  4.97,  4.97, ...,  4.97,  4.97,  4.97],
       [ 4.98,  4.98,  4.98, ...,  4.98,  4.98,  4.98],
       [ 4.99,  4.99,  4.99, ...,  4.99,  4.99,  4.99]])

from matplotlib.pyplot import imshow, title

import matplotlib.pyplot as plt
z = np.sqrt(xs ** 2 + ys ** 2)
z
plt.imshow(z, cmap=plt.cm.gray); plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")

<matplotlib.text.Text at 0x69050b8>

plt.draw()

<matplotlib.figure.Figure at 0x673bb00>

Expressing conditional logic as array operations

xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])

result = [(x if c else y)
          for x, y, c in zip(xarr, yarr, cond)]#大数据处理不快，无法用于多维数组
result

[1.1000000000000001, 2.2000000000000002, 1.3, 1.3999999999999999, 2.5]

result = np.where(cond, xarr, yarr)
result

array([ 1.1,  2.2,  1.3,  1.4,  2.5])

arr = randn(4, 4)
print arr
print np.where(arr > 0, 2, -2)
print np.where(arr > 0, 2, arr) # set only positive values to 2

[[-1.0354 -0.8886  0.5213 -0.4209]
 [-2.13    0.259  -3.2087  0.5629]
 [-0.5451  1.7162  0.507  -0.3764]
 [ 0.1984 -0.9892  2.0953 -1.7482]]
[[-2 -2  2 -2]
 [-2  2 -2  2]
 [-2  2  2 -2]
 [ 2 -2  2 -2]]
[[-1.0354 -0.8886  2.     -0.4209]
 [-2.13    2.     -3.2087  2.    ]
 [-0.5451  2.      2.     -0.3764]
 [ 2.     -0.9892  2.     -1.7482]]

# Not to be executed
np.where(cond1 & cond2, 0,
         np.where(cond1, 1,
                  np.where(cond2, 2, 3)))#第二层相当于else

# Not to be executed
result = 1 * cond1 + 2 * cond2 + 3 * -(cond1 | cond2)

Mathematical and statistical methods

arr = np.random.randn(5, 4) # normally-distributed data
print arr.mean()
print np.mean(arr)
print arr.sum()

0.0302370467869
0.0302370467869
0.604740935738

arr

array([[-1.4707,  1.1963, -1.02  , -0.1075],
       [-0.2141, -1.2172,  0.4418,  1.5962],
       [-1.6373,  0.4263,  1.459 ,  1.1363],
       [-1.5198,  1.6038,  0.4484, -1.211 ],
       [-0.0563, -0.4578,  0.456 ,  0.7522]])

print arr.mean(axis=1)
print arr.sum(0)

[ 1.  4.  7.]
[ 9 12 15]

arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
print arr.cumsum(0)#按行累加
print arr.cumprod(1)#按列类积

[[ 0  1  2]
 [ 3  5  7]
 [ 9 12 15]]
[[  0   0   0]
 [  3  12  60]
 [  6  42 336]]

基本数组统计方法

sum：对数组中全部或轴向元素求和。
mean：算数平均数。
std/var：标准差、方差
min/max：最小值和最大值
argmin/argmax：最小值和最大值的索引
cumsum：所有元素的累计和
cumprod：所有元素的累计积

Methods for boolean arrays

arr = randn(100)
(arr > 0).sum() # Number of positive values

bools = np.array([False, False, True, False])
print bools.any()#是否存在True
print bools.all()#是否所有为True

True
False

Sorting

arr = randn(8)
print arr
arr.sort()
arr

[ 2.0659 -0.7674  0.6874  0.243  -1.1497  0.5156  1.7158 -1.0473]

array([-1.1497, -1.0473, -0.7674,  0.243 ,  0.5156,  0.6874,  1.7158,
        2.0659])

arr = randn(5, 3)
print arr
arr.sort(1)#按列排序，改变原数组
print arr

[[-0.9779 -1.6321  0.5236]
 [ 0.2077  0.9451  0.4431]
 [ 1.6238  1.1947 -0.2424]
 [-0.8365 -0.2088 -1.4988]
 [-0.0167  0.3315 -2.5885]]
[[-1.6321 -0.9779  0.5236]
 [ 0.2077  0.4431  0.9451]
 [-0.2424  1.1947  1.6238]
 [-1.4988 -0.8365 -0.2088]
 [-2.5885 -0.0167  0.3315]]

large_arr = randn(1000)
large_arr.sort()
large_arr[int(0.05 * len(large_arr))] # 5% quantile分位数

-1.5464926345098284

Unique and other set logic

names = np.array(['Bob',  'Will', 'Joe','Bob', 'Will', 'Joe', 'Joe'])#去重，排序
print np.unique(names)
ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])
np.unique(ints)

['Bob' 'Joe' 'Will']

array([1, 2, 3, 4])

sorted(set(names))#纯Python

['Bob', 'Joe', 'Will']

values = np.array([6, 0, 0, 3, 2, 5, 6])
np.in1d(values, [2, 3, 6])

array([ True, False, False,  True,  True, False,  True], dtype=bool)

数组的集合运算

unique(x):计算x中的唯一元素，并返回有序结果
intersect1d(x, y)：计算x和y中的巩固元素，并返回有序结果
union1d(x, y)：计算并集，返回有序结果
in1d(x, y)：x元素是否包含于y的布尔型数组
setdiff1d(x,y)：集合差，在x中不在y中
setxor1d(x, y)：对称差，只存在于一个集合中的元素

File input and output with arrays

Storing arrays on disk in binary format

arr = np.arange(10)
np.save('some_array', arr)

np.load('some_array.npy')

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

np.savez('array_archive.npz', a=arr, b=arr)#保存到压缩文件中

arch = np.load('array_archive.npz')
arch['b']

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

!rm some_array.npy
!rm array_archive.npz

'rm' 不是内部或外部命令，也不是可运行的程序
或批处理文件。
'rm' 不是内部或外部命令，也不是可运行的程序
或批处理文件。

Saving and loading text files

!cat array_ex.txt

'cat' 不是内部或外部命令，也不是可运行的程序
或批处理文件。

arr = np.loadtxt('array_ex.txt', delimiter=',')#将文件加载到二维数组中
arr

C:\Users\WangYixin\Anaconda2\lib\site-packages\numpy\lib\npyio.py:891: UserWarning: loadtxt: Empty input file: "array_ex.txt"
  warnings.warn('loadtxt: Empty input file: "%s"' % fname)





array([], dtype=float64)

Linear algebra

x = np.array([[1., 2., 3.], [4., 5., 6.]])
y = np.array([[6., 23.], [-1, 7], [8, 9]])
print x
print y
x.dot(y)  # equivalently np.dot(x, y)

[[ 1.  2.  3.]
 [ 4.  5.  6.]]
[[  6.  23.]
 [ -1.   7.]
 [  8.   9.]]





array([[  28.,   64.],
       [  67.,  181.]])

np.dot(x, np.ones(3))

array([  6.,  15.])

np.random.seed(12345)

from numpy.linalg import inv, qr
X = randn(5, 5)
mat = X.T.dot(X)
print inv(mat)
print mat.dot(inv(mat))
q, r = qr(mat)
r

[[ 0.7815  1.2761  0.0347 -0.9711 -0.4199]
 [ 1.2761  4.8959  1.0086 -4.0922 -0.7354]
 [ 0.0347  1.0086  0.6297 -1.1609 -0.0934]
 [-0.9711 -4.0922 -1.1609  3.9143  0.5037]
 [-0.4199 -0.7354 -0.0934  0.5037  0.4535]]
[[ 1. -0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.]
 [ 0. -0.  1.  0.  0.]
 [ 0.  0. -0.  1.  0.]
 [ 0.  0. -0. -0.  1.]]





array([[ -9.9631,  -3.1683, -11.5922,  -8.1902,  -8.6384],
       [  0.    ,  -3.6452,  -3.3099,  -4.7777,  -1.323 ],
       [  0.    ,   0.    ,  -3.0731,  -1.1121,   1.6327],
       [  0.    ,   0.    ,   0.    ,  -0.3792,   1.8321],
       [  0.    ,   0.    ,   0.    ,   0.    ,   0.9186]])

常用numpy.linalg函数

diag：以一维数组的形式返回方阵的对角线元素
dot：矩阵乘法
trace：计算对角线元素的和
det：矩阵行列式
eig：方阵的特征值和特征向量
inv：方阵的逆
pinv：矩阵的Moore-Penrose逆
qr：计算QR分解
svd：奇异值分解
solve：解线性方程组Ax=b,A是一个方阵
lstsq：Ax=b的最小二乘解

Random number generation

samples = np.random.normal(size=(4, 4))#标准正态分布
samples

array([[ 0.0699,  0.2467, -0.0119,  1.0048],
       [ 1.3272, -0.9193, -1.5491,  0.0222],
       [ 0.7584, -0.6605,  0.8626, -0.01  ],
       [ 0.05  ,  0.6702,  0.853 , -0.9559]])

from random import normalvariate
N = 1000000
%timeit samples = [normalvariate(0, 1) for _ in xrange(N)]
%timeit np.random.normal(size=N)

1 loop, best of 3: 1.7 s per loop
10 loops, best of 3: 72.7 ms per loop

numpy.random函数

seed：确定随机数生成器的种子
permutation：返回一个序列的随机排列或者返回一个随机排列的范围
shuffle：对一个序列随机排序
rand：产生均匀分布的样本值
randint：从给定的上下限范围内随机选取整数
randn：产生正态分布的样本值
binomial：产生二项分布的样本值
normal：产生高斯分布的样本值
beta：产生Beta分布的样本值
chisquare：产生卡方分布的样本值
gamma：产生Gamma分布的样本值
uniform：产生在[0,1)中均匀分布的样本值

Example: Random Walks

import random
position = 0
walk = [position]
steps = 1000
for i in xrange(steps):
step = 1 if random.randint(0, 1) else -1
position += step
walk.append(position)

np.random.seed(12345)

nsteps = 1000
draws = np.random.randint(0, 2, size=nsteps)
steps = np.where(draws > 0, 1, -1)
walk = steps.cumsum()

walk.min()
walk.max()

(np.abs(walk) >= 10).argmax()#返回第一个最大值的索引

Simulating many random walks at once

nwalks = 5000
nsteps = 1000
draws = np.random.randint(0, 2, size=(nwalks, nsteps)) # 0 or 1
steps = np.where(draws > 0, 1, -1)
walks = steps.cumsum(1)
walks

array([[  1,   0,   1, ...,   8,   7,   8],
       [  1,   0,  -1, ...,  34,  33,  32],
       [  1,   0,  -1, ...,   4,   5,   4],
       ..., 
       [  1,   2,   1, ...,  24,  25,  26],
       [  1,   2,   3, ...,  14,  13,  14],
       [ -1,  -2,  -3, ..., -24, -23, -22]])

walks.max()
walks.min()

-133

hits30 = (np.abs(walks) >= 30).any(1)
print hits30
hits30.sum() # Number that hit 30 or -30

[False  True False ..., False  True False]

crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1)
crossing_times.mean()

498.88973607038122

steps = np.random.normal(loc=0, scale=0.25,
                         size=(nwalks, nsteps))