Python 大数据挖掘与实战 基础+NumPy包的使用

  • Post author:
  • Post category:python




Python 大数据挖掘与实战 基础+NumPy包的使用



python 基础



基本的数据类型

数值、字符串、列表、元组、集合、字典



科学计算包NumPy



1.numPy简介

用于科学计算的基础包,数据分析与挖掘包的基础



2.numPy的使用




Array()

函数创建数组
import numpy as np
L=[[1,2],[3,4]]
array = np.array(L)
print(array)
---------------------
[[1 2]
 [3 4]]


利用内置函数创建数组
import numpy as np

# ones(n,m) 创建n行m列元素全为 1 的数组
# zeros(n,m) 创建_____元素全为0的数组
# arange(a,b,c) 创建以a 为起始值,b-1为末值,步长为c 的一维数组
# a=0,c=1(a,c的默认值)
a1 = np.ones((3, 4))
a2 = np.zeros((3, 4))
a3 = np.arange(0, 10, 2)
a4 = np.arange(10)
a5 = np.array((10, 3))
print(a1, a2, a3, a4, a5,sep='\n')
-------------------------------------
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[0 2 4 6 8]
[0 1 2 3 4 5 6 7 8 9]
[10  3]


数组尺寸



.shape

返回数组的尺寸,返回值为元组



.reshape()

将数组进行重排
import numpy as np

d1 = [1,2,3,4,0.1,7]
d3 = [[1,2,3,4],[5,6,7,8]]
d11 = np.array(d1)
d22 = np.array(d3)
del d1,d3
s11 = d11.shape
s22 = d22.shape
print(s11,s22,sep='\n')
------------------------
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
(6,)
(2, 4)
import numpy as np

r= np.array(range(9))
r1 =r.reshape((3,3))
print(r,r1,sep='\n')
-------------------------
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
[0 1 2 3 4 5 6 7 8]
[[0 1 2]
 [3 4 5]
 [6 7 8]]


数组运算

数组之间的加减乘除、乘方运算、数组的数学函数运算

import numpy as np

print('*' * 40)
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
# 加减乘除乘方运算
print(A + B)
print(A - B)
print(A * B)
print(A / B)
print(1 / A)
print(A ** 2)

C1 = np.array([1, 2, 3.4, 5, 7, 5.4, 3, 3.6])
C2 = (C1 - min(C1) / (max(C1) - min(C1)))
print(C1, C2)
# 数学运算
D = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]])
print(np.sqrt(D))
print(np.abs([1, -2, -100]))
print(np.cos([1, 2, 3]))
print(np.sin(D))
print(np.exp(D))
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[ 6  8]
 [10 12]]
[[-4 -4]
 [-4 -4]]
[[ 5 12]
 [21 32]]
[[0.2        0.33333333]
 [0.42857143 0.5       ]]
[[1.         0.5       ]
 [0.33333333 0.25      ]]
[[ 1  4]
 [ 9 16]]
[1.  2.  3.4 5.  7.  5.4 3.  3.6] [0.83333333 1.83333333 3.23333333 4.83333333 6.83333333 5.23333333
 2.83333333 3.43333333]
[[1.         1.41421356 1.73205081 2.        ]
 [2.23606798 2.44948974 2.64575131 2.82842712]
 [3.         3.16227766 3.31662479 3.46410162]
 [3.60555128 3.74165739 3.87298335 4.        ]]
[  1   2 100]
[ 0.54030231 -0.41614684 -0.9899925 ]
[[ 0.84147098  0.90929743  0.14112001 -0.7568025 ]
 [-0.95892427 -0.2794155   0.6569866   0.98935825]
 [ 0.41211849 -0.54402111 -0.99999021 -0.53657292]
 [ 0.42016704  0.99060736  0.65028784 -0.28790332]]
[[2.71828183e+00 7.38905610e+00 2.00855369e+01 5.45981500e+01]
 [1.48413159e+02 4.03428793e+02 1.09663316e+03 2.98095799e+03]
 [8.10308393e+03 2.20264658e+04 5.98741417e+04 1.62754791e+05]
 [4.42413392e+05 1.20260428e+06 3.26901737e+06 8.88611052e+06]]


数组切片

抽取数组中的部分元素构成新的数组



利用数组本身的索引机制切片
import numpy as np

print('*' * 40)
D = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]])
print(D[1, 2])
print(D[:, [1, 3]])
print(D[[1, 3], :])
# 取第0列大于5的所有列数据
print(D[D[:, 0] > 5, :])
print(D[D[:, 0] > 5, [2, 3]])
TF = [True, False, False, True]
print(D[TF, :])
print(D[TF, [2, 3]])
# 取出所有比4大的元素
print(D[D > 4])
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
7
[[ 2  4]
 [ 6  8]
 [10 12]
 [14 16]]
[[ 5  6  7  8]
 [13 14 15 16]]
[[ 9 10 11 12]
 [13 14 15 16]]
[11 16]
[[ 1  2  3  4]
 [13 14 15 16]]
[ 3 16]
[ 5  6  7  8  9 10 11 12 13 14 15 16]



ix_()

函数进行数组切片

构造行、列下表索引器

import numpy as np

print('*' * 40)
D = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]])
print(D[np.ix_([1, 2], [1, 3])])
print(D[np.ix_(np.arange(3), [1, 3])])
print(D[np.ix_(D[:, 1] < 11, [1, 2])])
print(D[np.ix_(D[:, 1] < 11, [2])])
TF = [True, False, False, True]
print(D[np.ix_(TF, [2])])
print(D[np.ix_(TF, [2, 3])])
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[ 6  8]
 [10 12]]
[[ 2  4]
 [ 6  8]
 [10 12]]
[[ 2  3]
 [ 6  7]
 [10 11]]
[[ 3]
 [ 7]
 [11]]
[[ 3]
 [15]]
[[ 3  4]
 [15 16]]


数组连接

数组的水平连接——

hstack()

数组的垂直连接——

vstack()

import numpy as np

print('*' * 40)
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
print(np.hstack((A, B)))
print(np.vstack((A, B)))
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[1 2 5 6]
 [3 4 7 8]]
[[1 2]
 [3 4]
 [5 6]
 [7 8]]


数据存取


save()

——将数据集保存为二进制数据文件,拓展名为

npy

import numpy as np

print('*' * 40)
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
C_s = np.hstack((A, B))
# 保存为二进制数据文件,名字为data.npy
np.save('data', C_s)
 # 加载该数据集
import numpy as np

print('*' * 40)
C_s = np.load('data.npy')
print(C_s)
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[1 2 5 6]
 [3 4 7 8]]


数组形态变换


reshape()

——改变原始数据的形状,不改变原始数据的值

import numpy as np

print('*' * 40)
A = np.arange(12)
B = A.reshape((3,4))
print(B)
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


数组的排序与搜索


sort()

——数据从小到大排序


argmax()


argmin()

返回待搜索数组最大值最小值元素的

索引值

(存在多个,返回第一个)

对于二维数组,可设置 axis=0 或 axis=1 返回各列和各行的最大值、最小值的索引值

import numpy as np
import random

print('*' * 40)
A = []
for i in range(10):
    A.append(random.randint(0, 20))
print(A)
arr = np.array(A)
print(np.sort(arr))
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[7, 3, 15, 3, 16, 7, 1, 10, 9, 8]
[ 1  3  3  7  7  8  9 10 15 16]
import numpy as np
import random

print('*' * 40)
arr = np.arange(1, 13)
print(arr)
arr1 = arr.reshape((3, 4))
print(np.argmax(arr1))
print(np.argmin(arr1))
print(np.argmin(arr1, axis=1))
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[ 1  2  3  4  5  6  7  8  9 10 11 12]
11
0
[0 0 0]


矩阵与线性代数运算


创建NumPy矩阵


mat() matrix() bmat()

等函数创建矩阵

import numpy as np

print('*' * 40)
mat1 = np.mat("1 2 3;4 5 6;7 8 9")
mat2 = np.matrix([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(mat1, mat2, sep='\n')
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]


bmat()

将小矩阵转换成大矩阵

import numpy as np

print('*' * 40)
arr1 = np.eye(3)
arr2 = 3 * arr1
mat = np.bmat('arr1 arr2;arr1 arr2')
print(arr1, arr2, mat, sep='\n')
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[[3. 0. 0.]
 [0. 3. 0.]
 [0. 0. 3.]]
[[1. 0. 0. 3. 0. 0.]
 [0. 1. 0. 0. 3. 0.]
 [0. 0. 1. 0. 0. 3.]
 [1. 0. 0. 3. 0. 0.]
 [0. 1. 0. 0. 3. 0.]
 [0. 0. 1. 0. 0. 3.]]


矩阵的属性和基本运算
特有属性 说明
T 返回自身的转置
H 自身的共轭转置
I 返回自身的逆矩阵
import numpy as np

print('*' * 40)
mat = np.matrix(np.arange(4).reshape(2,2))
# 自身的转置
print(mat.T)
# 自身的共轭转置
print(mat.H)
# 自身的逆矩阵
print(mat.I)
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[0 2]
 [1 3]]
[[0 2]
 [1 3]]
[[-1.5  0.5]
 [ 1.   0. ]]


线性代数运算



numpy.linalg

模块中

函数 说明
inv 计算逆矩阵
solve 求解线性方程组Ax = b
eig 求解特征值和特征向量
eigvals 求解特征值
svd 奇异值分解
det 计算矩阵行列式的值
import numpy as np

print('*' * 40)
mat = np.matrix(np.arange(4).reshape(2,2))
# 计算逆矩阵
print(np.linalg.inv(mat))
D:\Anaconda3\python.exe D:/PycharmProjects/2021/numPy.py
****************************************
[[-1.5  0.5]
 [ 1.   0. ]]

后期需要学习或复习线性代数:



求解线性方程组


求解特征值和特征向量


奇异值分解


计算矩阵行列式的值



版权声明:本文为weixin_55768452原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。