02_numpy
numpy get started导入numpy库,并查看numpy版本 import numpy as np np.__version__ '1.13.0' import matplotlib.pyplot as plt cat = plt.imread('cat.jpg') print(cat) [[[231 186 131] [232 187 132] [233 188 133] ...,[100 54 54] [ 92 48 47] [ 85 43 44]] [[232 187 132] [232 187 132] [233 188 133] ...,[100 54 54] [ 92 48 47] [ 84 42 43]] [[232 187 132] [233 188 133] [233 188 133] ...,[ 99 53 53] [ 91 47 46] [ 83 41 42]] ...,[[199 119 82] [199 119 82] [200 120 83] ...,[189 99 65] [187 97 63] [187 97 63]] [[199 119 82] [199 119 82] [199 119 82] ...,[188 98 64] [186 96 62] [188 95 62]] [[199 119 82] [199 119 82] [199 119 82] ...,[188 98 64] [188 95 62] [188 95 62]]] type(cat) numpy.ndarray cat.shape (456,730,3) plt.imshow(cat) plt.show() #请问电影是什么,nd.array 四维 #(x,456,760,3) 一、创建ndarray1. 使用np.array()由python list创建参数为列表: 注意:
l = [3,1,9,6] n = np.array(l) display(n,l) array([3,6]) [3,6] display(n.shape,l.shape) -------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-15-5eeacc6c47ae> in <module>() ----> 1 display(n.shape,l.shape) AttributeError: 'list' object has no attribute 'shape' n2 = np.array([[3,7,1],[3,8],[2,6,8]]) display(n2.shape) (3,4) n3 = np.array(['0',9.18,20]) n3 array(['0','9.18','20'],dtype='<U4') n4 = np.array([1,3.14]) n4 array([ 1.,2.,3.14]) 2. 使用np的routines函数创建包含以下常见创建方法: 1) np.ones(shape,dtype=None,order=‘C‘) n = np.ones((4,5)) n array([[1.,1.,1.],[1.,1.]]) n2 = np.ones((4,6),dtype=int) n2 array([[[1,[1,1]],[[1,1]]]) 2) np.zeros(shape,dtype=float,order=‘C‘) n3 = np.zeros((4,5)) n3 array([[0.,0.,0.],[0.,0.]]) 3) np.full(shape,fill_value,order=‘C‘) n = np.full((4,5),dtype=int,fill_value=8) n array([[8,8,[8,8]]) 4) np.eye(N,M=None,k=0,dtype=float) n = np.eye(4,5) n # 满秩矩阵 # x + y = 10 # x - y = 5 # 1 1 # 1 -1 # 第二行减去第一行 # 1 1 # 0 -2 # 1/2乘于第二行 # 1 1 # 0 -1 # 第二行加上第一行 # 1 0 # 0 -1 # 第二行乘与-1 # 1 0 # 0 1 # x + y # 2x + 2Y # 无解 # 1 1 # 2 2 array([[1.,0.]]) 5) np.linspace(start,stop,num=50,endpoint=True,retstep=False,dtype=None) n = np.linspace(0,100,retstep=True,endpoint=False) n (array([ 0,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98]),2.0) n = np.linspace(0,150,dtype=np.int8) n # line # 2^(n-1) -1 # lin = linear algebra array([ 0,3,15,21,27,33,39,45,55,61,67,73,79,85,91,97,101,104,107,110,113,116,119,122,125,-128,-125,-122,-119,-116,-113,-110,-106],dtype=int8) 6) np.arange([start,]stop,[step,]dtype=None) n = np.arange(10) n array([0,9]) n = np.arange(1,11,step=2) n array([1,9]) 7) np.random.randint(low,high=None,size=None,dtype=‘l‘) n = np.random.randint(10) n 8 n = np.random.randint(0,255,size=(3,5)) n array([[[ 89,202,49],[118,159,190,227],[177,232,158,64],[112,216]],[[ 2,180,152,244],[ 46,185,155,253],[180,135,86],[ 64,218,69,128,90]],[[163,12],[ 15,181,87,62],[218,166,217],[137,49,194]]]) image = np.random.randint(0,size=(456,3)) image.shape (456,3) plt.imshow(image) plt.show(image) --------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-97-a28aaec0347e> in <module>() 1 plt.imshow(image) ----> 2 plt.show(image) C:ProgramDataAnaconda3libsite-packagesmatplotlibpyplot.py in show(*args,**kw) 251 """ 252 global _show --> 253 return _show(*args,**kw) 254 255 C:ProgramDataAnaconda3libsite-packagesipykernelpylabbackend_inline.py in show(close,block) 39 # only call close('all') if any to close 40 # close triggers gc.collect,which can be slow ---> 41 if close and Gcf.get_all_fig_managers(): 42 matplotlib.pyplot.close('all') 43 ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all() 8) np.random.randn(d0,d1,...,dn) 标准正太分布 n = np.random.randn(10) n array([-0.4173303,-0.41736696,-0.11888109,-0.51925789,1.24985884,1.52967696,0.05327912,0.84738899,1.03118302,-0.64532473]) 9)np.random.normal(loc=0.0,scale=1.0,size=None) n = np.random.normal(175,scale=5.0,size=50) n array([177.62703208,176.50746247,173.26956915,162.29355083,172.05271936,177.61948035,172.52243162,175.43294252,181.14225673,175.21450574,179.56055092,170.883815,170.91435313,176.25008762,176.3347509,183.90347049,178.91856559,168.84725605,176.32881783,172.77973728,173.12257339,174.75054378,166.60349541,171.68263799,168.83419713,174.25085091,175.66113435,174.12039025,177.22772738,169.01523024,175.57587527,172.89083838,179.52153939,173.70318334,179.06473552,176.50099117,175.83008746,174.78059027,175.58909128,178.11274357,183.45771692,172.43399789,179.56800892,182.14239994,176.43701867,177.37866513,179.55215095,174.5389049,175.48698667,168.73145269]) 10) np.random.random(size=None) 生成0到1的随机数,左闭右开 n = np.random.random(10) n array([0.22608606,0.62764532,0.62219649,0.05348086,0.94994404,0.29048963,0.49340728,0.04651386,0.59005488,0.59901244]) 二、ndarray的属性4个必记参数: dtype:元素类型 cat.ndim 3 cat.shape (456,3) cat.size 998640 cat.dtype dtype('uint8') 三、ndarray的基本操作1. 索引一维与列表完全一致 l = [1,5] l[2:4] [3,4] n = np.array(l) n[2] 3 # 找一个二维ndarray中的某个数 n2 = np.random.randint(0,size=(4,4)) n2 array([[ 8,117,209,[156,192,180],[ 33,53,179],[ 56,236,45]]) # 查找53 n2[2][2] 53 n2[2,2] 53 n3 = np.random.randint(0,6)) n3 array([[[128,108,112,60],[234,111,237,95],[127,226,85],[239,233,210,165,186,57],[ 27,17,208,120]],[[199,169,153,75],[179,205,239,228],[154,204,138,231,97],[ 55,193,245,105,210],[157,227,230,242,185]],[[ 67,189,206],[220,241,141,146,59],206,240,105],[176,252,212,127],[165,130,77,56]],[[194,237],143,191,37,236],[194,65,223,125],[ 92,162,93,3],[ 39,179,213,23,141]]]) n3[1,3] 5 np.random.seed(100) np.random.seed(100) np.random.randn(10) array([-1.74976547,0.3426804,1.1530358,-0.25243604,0.98132079,0.51421884,0.22117967,-1.07004333,-0.18949583,0.25500144]) n = np.array([1,np.nan]) np.sum(n) np.nansum(n) 6.0 根据索引修改数据 n3[1,3] = 8 n3 array([[[128,141]]]) 2. 切片一维与列表完全一致 l = [1,5] l[::-1] [5,1] l[::-2] l [1,5] 将数据反转,例如[1,3]---->[3,1] n = np.random.randint(0,5)) n array([[211,6],[ 86,139],[185,247,99,31],[221,137,162]]) 两个::进行切片 n[::-1] n array([[211,162]]) 3. 变形使用reshape函数,注意参数是一个tuple! n = np.arange(6) n array([0,5]) n2 = np.reshape(n,(3,2)) n2 array([[0,[4,5]]) cat.shape (456,3) n = np.reshape(cat,(8322,120)) n array([[231,131,235,135],[237,137],138],[203,89,201,121,[200,120,197,82],[197,188,95,62]],dtype=uint8) 4. 级联
n1 = np.random.randint(0,size=(5,6)) n2 = np.random.randint(0,6)) display(n1,n2) array([[ 67,115,248,248],[ 66,156,250,195],[248,172,19,200,[139,25,116]]) array([[182,251,[169,168],[231,59,84],[ 6,51,136,89],[ 67,147,90]]) np.concatenate((n1,n2),axis=1) array([[ 67,182,195,90]])
# hstack h new_image = np.hstack((cat,image)) plt.imshow(new_image) plt.show() # vstack vertical new_image = np.vstack((cat,image)) plt.imshow(new_image) plt.show() 5. 切分与级联类似,三个函数完成切分工作:
n = np.random.randint(0,size = (4,6)) n array([[92,53],[42,18],[25,35,13],57,59]]) np.vsplit(n,(1,2)) [array([[92,53]]),array([[42,18]]),array([[25,59]])] n = np.random.randint(0,size=(6,6)) n array([[48,83,20],[80,37],[52,[ 5,[27,48],[30,75,90]]) np.vsplit(n,(2,5)) [array([[48,37]]),array([[52,48]]),array([[30,90]])] np.split(n,axis=1) [array([[48,77],92],99],73],54]]),array([[69,24],[21,[ 2,33],[34,[95,[49,75]]),array([[83,[16,[28,[ 0,[ 8,[44,90]])] np.vsplit(n,3) [array([[48,95]]),array([[27,90]])] np.hsplit(n,4)) [array([[33,46],[98,40],[47,91]]),array([[53,7],[12,55],[69,50],[32,52]]),array([[56,43],[18,[83,38]])] cat.shape (456,3) 456 730 result = np.split(cat,axis = 0) plt.imshow(result[0]) plt.show() s_result = np.split(cat,axis = 1) len(s_result) 2 plt.imshow(s_result[0]) plt.show() 6. 副本所有赋值运算不会为ndarray的任何元素创建副本。对赋值后的对象的操作也对原来的对象生效。 l = [1,4] l2 = l l2[2] = 5 l [1,4] n1 = np.arange(10) n2 = n1 n2[3] = 100 n1 array([ 0,9]) n3 = n1.copy() n3[5] = 200 n1 array([ 0,9]) 可使用copy()函数创建副本 四、ndarray的聚合操作1. 求和np.sumn = np.arange(11) n array([ 0,10]) np.sum(n) 55 n = np.random.randint(0,6)) n array([[80,9],[70,[40,13,45],22]]) np.sum(n,axis=1) array([294,363,279]) 2. 最大最小值:np.max/ np.min同理 n = np.arange(11) n array([ 0,10]) np.median(n) 5.0 np.mean(n) 5.0 n = np.random.randint(0,size=10) n array([42,59]) np.mean(n) 51.3 np.median(n) 50.5 np.max(n) 10 np.min(n) 0 n = np.random.randint(0,6)) n array([[82,[66,78],[ 3,69],[62,[41,43,7]]) np.max(n,axis=0) array([82,99]) 3. 其他聚合操作Function Name NaN-safe Version Description np.sum np.nansum Compute sum of elements np.prod np.nanprod Compute product of elements np.mean np.nanmean Compute mean of elements np.std np.nanstd Compute standard deviation np.var np.nanvar Compute variance np.min np.nanmin Find minimum value np.max np.nanmax Find maximum value np.argmin np.nanargmin Find index of minimum value np.argmax np.nanargmax Find index of maximum value np.median np.nanmedian Compute median of elements np.percentile np.nanpercentile Compute rank-based statistics of elements np.any N/A Evaluate whether any elements are true np.all N/A Evaluate whether all elements are true np.power 幂运算 np.argmin(n,axis=0) array([2,4],dtype=int64) cat.shape (456,3) cat2 = cat.reshape((-1,3)) cat2.shape (332880,3) n = np.random.randint(0,5)) n array([[8,5],[7,[6,9]]) np.reshape(n,(-1,)) array([8,9]) cat3 = cat.reshape((456*730,3)) cat3.shape (332880,3) cat3.max(axis = 0) array([255,219],dtype=uint8) max_cat = cat.max(axis = (0,1)) max_cat array([255,dtype=uint8) max_cat.shape (3,) cat.min() 0 np.sum 和 np.nansum 的区别 a = np.array([1,np.nan]) a array([ 1.,nan]) np.nansum(a) 3.0 操作文件使用pandas打开文件president_heights.csv import pandas as pd data = pd.read_csv('president_heights.csv') type(data) data
heights = data['height(cm)'] heights type(heights) pandas.core.series.Series np.max(heights) 193 np.mean(heights) 179.73809523809524 np.std(heights) 6.931843442745893 np.min(heights) 163 五、ndarray的矩阵操作1. 基本矩阵操作1) 算术运算符:
n = np.random.randint(0,5)) n array([[2,[0,0],2],[5,5]]) # 加 n + 1 array([[ 3,[ 1,[ 7,[ 6,6]]) # 减 n - 1 array([[ 1,-1,[-1,-1],[ 4,4]]) # 两个矩阵相加 n2 = np.random.randint(0,5)) n2 array([[2,[9,9]]) n + n2 array([[ 4,15],[15,[ 9,14]]) n3 = np.random.randint(0,size=(2,5)) n3 array([[8,7]]) n2 + n3 --------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-97-5f0861827bc6> in <module>() ----> 1 n2 + n3 ValueError: operands could not be broadcast together with shapes (4,5) (2,5) 2) 矩阵积np.dot() n1 = np.random.randint(0,3)) n1 array([[8,5]]) n2 = np.random.randint(0,4)) n2 array([[4,7]]) np.dot(n1,n2) array([[ 72,111],[ 37,47,63]]) 2. 广播机制【重要】ndarray广播机制的两条规则
例1: m = np.ones((2,3),dtype=int) m array([[1,1]]) n = np.arange(3) n array([0,2]) m + n array([[1,3]]) 例2: a = np.arange(3).reshape((3,1)) a array([[0],[1],[2]]) b = np.arange(3) b array([0,2]) a + b array([[0,4]]) 习题 六、ndarray的排序小测验: def Sortn(x): 代码越短越好 n = [5,9] def bubble(n): for i in range(len(n) -1): for j in range(i+1,len(n)): if n[i] > n[j]: n[i],n[j] = n[j],n[i] bubble(n) n [2,9] # 选择排序 def select(n): for i in range(len(n)): # 选出最小值的索引 index = np.argmin(n[i:]) + i # 把最小值和当前值的位置换一下 n[i],n[index] = n[index],n[i] n = [4,3] select(n) n [0,6] 1. 快速排序np.sort()与ndarray.sort()都可以,但有区别:
n = np.random.randint(0,size=6) n array([6,3]) np.sort(n) array([1,8]) np.sort(n) n array([6,3]) n.sort() n array([1,8]) (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |