Python 大数据挖掘与实战 基础+NumPy包的使用
python 基础
基本的数据类型
数值、字符串、列表、元组、集合、字典
科学计算包NumPy
1.numPy简介
用于科学计算的基础包,数据分析与挖掘包的基础
2.numPy的使用
Array()
函数创建数组
Array()
import numpy as np
L=[[1,2],[3,4]]
array = np.array(L)
print(array)
---------------------
[[1 2]
[3 4]]
利用内置函数创建数组
import numpy as np
# ones(n,m) 创建n行m列元素全为 1 的数组
# zeros(n,m) 创建_____元素全为0的数组
# arange(a,b,c) 创建以a 为起始值,b-1为末值,步长为c 的一维数组
# a=0,c=1(a,c的默认值)
a1 = np.ones((3, 4))
a2 = np.zeros((3, 4))
a3 = np.arange(0, 10, 2)
a4 = np.arange(10)
a5 = np.array((10, 3))
print(a1, a2, a3, a4, a5,sep='\n')
-------------------------------------
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
[[1. 1. 1. 1.]
[1. 1. 1. 1.]
[1. 1. 1. 1.]]
[[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]]
[0 2 4 6 8]
[0 1 2 3 4 5 6 7 8 9]
[10 3]
数组尺寸
.shape
返回数组的尺寸,返回值为元组
.shape
.reshape()
将数组进行重排
.reshape()
import numpy as np
d1 = [1,2,3,4,0.1,7]
d3 = [[1,2,3,4],[5,6,7,8]]
d11 = np.array(d1)
d22 = np.array(d3)
del d1,d3
s11 = d11.shape
s22 = d22.shape
print(s11,s22,sep='\n')
------------------------
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
(6,)
(2, 4)
import numpy as np
r= np.array(range(9))
r1 =r.reshape((3,3))
print(r,r1,sep='\n')
-------------------------
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
[0 1 2 3 4 5 6 7 8]
[[0 1 2]
[3 4 5]
[6 7 8]]
数组运算
数组之间的加减乘除、乘方运算、数组的数学函数运算
import numpy as np
print('*' * 40)
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
# 加减乘除乘方运算
print(A + B)
print(A - B)
print(A * B)
print(A / B)
print(1 / A)
print(A ** 2)
C1 = np.array([1, 2, 3.4, 5, 7, 5.4, 3, 3.6])
C2 = (C1 - min(C1) / (max(C1) - min(C1)))
print(C1, C2)
# 数学运算
D = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]])
print(np.sqrt(D))
print(np.abs([1, -2, -100]))
print(np.cos([1, 2, 3]))
print(np.sin(D))
print(np.exp(D))
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[ 6 8]
[10 12]]
[[-4 -4]
[-4 -4]]
[[ 5 12]
[21 32]]
[[0.2 0.33333333]
[0.42857143 0.5 ]]
[[1. 0.5 ]
[0.33333333 0.25 ]]
[[ 1 4]
[ 9 16]]
[1. 2. 3.4 5. 7. 5.4 3. 3.6] [0.83333333 1.83333333 3.23333333 4.83333333 6.83333333 5.23333333
2.83333333 3.43333333]
[[1. 1.41421356 1.73205081 2. ]
[2.23606798 2.44948974 2.64575131 2.82842712]
[3. 3.16227766 3.31662479 3.46410162]
[3.60555128 3.74165739 3.87298335 4. ]]
[ 1 2 100]
[ 0.54030231 -0.41614684 -0.9899925 ]
[[ 0.84147098 0.90929743 0.14112001 -0.7568025 ]
[-0.95892427 -0.2794155 0.6569866 0.98935825]
[ 0.41211849 -0.54402111 -0.99999021 -0.53657292]
[ 0.42016704 0.99060736 0.65028784 -0.28790332]]
[[2.71828183e+00 7.38905610e+00 2.00855369e+01 5.45981500e+01]
[1.48413159e+02 4.03428793e+02 1.09663316e+03 2.98095799e+03]
[8.10308393e+03 2.20264658e+04 5.98741417e+04 1.62754791e+05]
[4.42413392e+05 1.20260428e+06 3.26901737e+06 8.88611052e+06]]
数组切片
抽取数组中的部分元素构成新的数组
利用数组本身的索引机制切片
import numpy as np
print('*' * 40)
D = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]])
print(D[1, 2])
print(D[:, [1, 3]])
print(D[[1, 3], :])
# 取第0列大于5的所有列数据
print(D[D[:, 0] > 5, :])
print(D[D[:, 0] > 5, [2, 3]])
TF = [True, False, False, True]
print(D[TF, :])
print(D[TF, [2, 3]])
# 取出所有比4大的元素
print(D[D > 4])
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
7
[[ 2 4]
[ 6 8]
[10 12]
[14 16]]
[[ 5 6 7 8]
[13 14 15 16]]
[[ 9 10 11 12]
[13 14 15 16]]
[11 16]
[[ 1 2 3 4]
[13 14 15 16]]
[ 3 16]
[ 5 6 7 8 9 10 11 12 13 14 15 16]
ix_()
函数进行数组切片
ix_()
构造行、列下表索引器
import numpy as np
print('*' * 40)
D = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]])
print(D[np.ix_([1, 2], [1, 3])])
print(D[np.ix_(np.arange(3), [1, 3])])
print(D[np.ix_(D[:, 1] < 11, [1, 2])])
print(D[np.ix_(D[:, 1] < 11, [2])])
TF = [True, False, False, True]
print(D[np.ix_(TF, [2])])
print(D[np.ix_(TF, [2, 3])])
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[ 6 8]
[10 12]]
[[ 2 4]
[ 6 8]
[10 12]]
[[ 2 3]
[ 6 7]
[10 11]]
[[ 3]
[ 7]
[11]]
[[ 3]
[15]]
[[ 3 4]
[15 16]]
数组连接
数组的水平连接——
hstack()
数组的垂直连接——
vstack()
import numpy as np
print('*' * 40)
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
print(np.hstack((A, B)))
print(np.vstack((A, B)))
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[1 2 5 6]
[3 4 7 8]]
[[1 2]
[3 4]
[5 6]
[7 8]]
数据存取
save()
——将数据集保存为二进制数据文件,拓展名为
npy
import numpy as np
print('*' * 40)
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
C_s = np.hstack((A, B))
# 保存为二进制数据文件,名字为data.npy
np.save('data', C_s)
# 加载该数据集
import numpy as np
print('*' * 40)
C_s = np.load('data.npy')
print(C_s)
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[1 2 5 6]
[3 4 7 8]]
数组形态变换
reshape()
——改变原始数据的形状,不改变原始数据的值
import numpy as np
print('*' * 40)
A = np.arange(12)
B = A.reshape((3,4))
print(B)
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
数组的排序与搜索
sort()
——数据从小到大排序
argmax()
argmin()
返回待搜索数组最大值最小值元素的
索引值
(存在多个,返回第一个)
对于二维数组,可设置 axis=0 或 axis=1 返回各列和各行的最大值、最小值的索引值
import numpy as np
import random
print('*' * 40)
A = []
for i in range(10):
A.append(random.randint(0, 20))
print(A)
arr = np.array(A)
print(np.sort(arr))
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[7, 3, 15, 3, 16, 7, 1, 10, 9, 8]
[ 1 3 3 7 7 8 9 10 15 16]
import numpy as np
import random
print('*' * 40)
arr = np.arange(1, 13)
print(arr)
arr1 = arr.reshape((3, 4))
print(np.argmax(arr1))
print(np.argmin(arr1))
print(np.argmin(arr1, axis=1))
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[ 1 2 3 4 5 6 7 8 9 10 11 12]
11
0
[0 0 0]
矩阵与线性代数运算
创建NumPy矩阵
mat() matrix() bmat()
等函数创建矩阵
import numpy as np
print('*' * 40)
mat1 = np.mat("1 2 3;4 5 6;7 8 9")
mat2 = np.matrix([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(mat1, mat2, sep='\n')
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[1 2 3]
[4 5 6]
[7 8 9]]
[[1 2 3]
[4 5 6]
[7 8 9]]
bmat()
将小矩阵转换成大矩阵
import numpy as np
print('*' * 40)
arr1 = np.eye(3)
arr2 = 3 * arr1
mat = np.bmat('arr1 arr2;arr1 arr2')
print(arr1, arr2, mat, sep='\n')
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]]
[[3. 0. 0.]
[0. 3. 0.]
[0. 0. 3.]]
[[1. 0. 0. 3. 0. 0.]
[0. 1. 0. 0. 3. 0.]
[0. 0. 1. 0. 0. 3.]
[1. 0. 0. 3. 0. 0.]
[0. 1. 0. 0. 3. 0.]
[0. 0. 1. 0. 0. 3.]]
矩阵的属性和基本运算
特有属性 | 说明 |
---|---|
T | 返回自身的转置 |
H | 自身的共轭转置 |
I | 返回自身的逆矩阵 |
import numpy as np
print('*' * 40)
mat = np.matrix(np.arange(4).reshape(2,2))
# 自身的转置
print(mat.T)
# 自身的共轭转置
print(mat.H)
# 自身的逆矩阵
print(mat.I)
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[0 2]
[1 3]]
[[0 2]
[1 3]]
[[-1.5 0.5]
[ 1. 0. ]]
线性代数运算
在
numpy.linalg
模块中
函数 | 说明 |
---|---|
inv | 计算逆矩阵 |
solve | 求解线性方程组Ax = b |
eig | 求解特征值和特征向量 |
eigvals | 求解特征值 |
svd | 奇异值分解 |
det | 计算矩阵行列式的值 |
import numpy as np
print('*' * 40)
mat = np.matrix(np.arange(4).reshape(2,2))
# 计算逆矩阵
print(np.linalg.inv(mat))
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[-1.5 0.5]
[ 1. 0. ]]
后期需要学习或复习线性代数:
求解线性方程组
求解特征值和特征向量
奇异值分解
计算矩阵行列式的值
版权声明:本文为weixin_55768452原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。