26.极区图(南丁格尔玫瑰图)
    
    27.维恩图 (Venn diagram)
    
    28.面状图(Area chart)
    
    29.树地图
   
    
    
    26.极区图(南丁格尔玫瑰图)
   
极区图(又名南丁格尔玫瑰图)呈放射延伸状,每块会占一定的角度。其半径大小表示它代表的某一类数据的大小。其角度大小表示它所占总类别的比例。
南丁格尔玫瑰图,是由南丁格尔发明的,她是英国护士和统计学家。其在英国军营工作时收集了在克里米亚战争时期的士兵在不同月份的死亡率和原因分布,通过玫瑰图的方式有效的打动了当时的高层管理人员,于是医疗改良的提案才受到了大力的支持,将士兵的死亡率从42%降低至2%,因此后来将此图形称为南丁格尔玫瑰图。
那南丁格尔玫瑰图一般用在什么场景呢?其实南丁格尔玫瑰图和饼图类似,算是饼图的一种变形,用法也一样,主要用在需要查看占比的场景中。两者唯一的区别是:饼图是通过角度判别占比大小,而玫瑰图可以通过半径大小或者扇形面积大小来判别。
import numpy as np
import matplotlib.pyplot as plt
# Fixing random state for reproducibility
np.random.seed(19680801)
# Compute pie slices
N = 10
theta = np.linspace(0.0, 2 * np.pi, N, endpoint=False)
radii = 10 * np.random.rand(N)
width = np.pi / 4 * np.random.rand(N)
colors = plt.cm.viridis(radii / 10.)
ax = plt.subplot(111, projection='polar')
ax.bar(theta, radii, width=width, bottom=0.0, color=colors, alpha=0.5)
plt.show()
     
   
import matplotlib.pyplot as plt
import numpy as np
N = 7
'''生成角度值'''
theta = np.arange(0.,2*np.pi,2*np.pi/N)
'''生成半径值'''
radii = np.array([7,4,5,3,2,4,6])
'''定义轴类型'''
plt.axes([0.025,0.025,0.95,0.95],polar=True)
'''定义颜色集,这里使用RGB值,当然也可以使用颜色名称'''
colors = np.array(['#4bb2c5','#c5b47f','#EAA228','#579575','#839557','#958c12','#953579'])
'''bar()函数要求传入角度和半径作为参数'''
bars = plt.bar(theta,radii,width=(2*np.pi/N),bottom=0.0,color=colors)
plt.show()
     
   
    
    
    27.维恩图 (Venn diagram)
   
维恩图是显示集合之间逻辑关系(交集,差集,并集)的图。
使用时要安装matplotlib-venn:pip install matplotlib-venn
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
# First way to call the 2 group Venn diagram
venn2(subsets=(10, 5, 2), set_labels=('Group A', 'Group B'))
plt.show()
     
   
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
# Second way
venn2([set(['A', 'B', 'C', 'D']), set(['D', 'E', 'F'])])
plt.show()
     
   
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
df = pd.DataFrame({'Product': ['Only cheese', 'Only red wine', 'Both'],
                   'NbClient': [900, 1200, 400]},
                  columns = ['Product', 'NbClient'])
print(df)
'''
输出结果:
         Product  NbClient
0    Only cheese       900
1  Only red wine      1200
2           Both       400
'''
# First way
plt.figure(figsize=(8, 11))
v2 = venn2(subsets = {'10': df.loc[0, 'NbClient'],
                      '01': df.loc[1, 'NbClient'],
                      '11': df.loc[2, 'NbClient']},
           set_labels=('', ''))
v2.get_patch_by_id('10').set_color('yellow')
v2.get_patch_by_id('01').set_color('red')
v2.get_patch_by_id('11').set_color('orange')
v2.get_patch_by_id('10').set_edgecolor('none')
v2.get_patch_by_id('01').set_edgecolor('none')
v2.get_patch_by_id('11').set_edgecolor('none')
v2.get_label_by_id('10').set_text('%s\n%d\n(%.0f%%)' % (df.loc[0, 'Product'],
                                                        df.loc[0, 'NbClient'],
                                                        np.divide(df.loc[0, 'NbClient'],
                                                                  df.NbClient.sum())*100))
v2.get_label_by_id('01').set_text('%s\n%d\n(%.0f%%)' % (df.loc[1, 'Product'],
                                                        df.loc[1, 'NbClient'],
                                                        np.divide(df.loc[1, 'NbClient'],
                                                                  df.NbClient.sum())*100))
v2.get_label_by_id('11').set_text('%s\n%d\n(%.0f%%)' % (df.loc[2, 'Product'],
                                                        df.loc[2, 'NbClient'],
                                                        np.divide(df.loc[2, 'NbClient'],
                                                                  df.NbClient.sum())*100))
for text in v2.subset_labels:
    text.set_fontsize(12)
plt.show()
     
   
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
# Second way
grp1 = set(['cheese-a', 'cheese-b', 'cheese-c', 'cheese-d',
            'cheese-e', 'cheese-f', 'cheese-g', 'cheese-h',
            'cheese-i', 'cheese', 'red wine'])
grp2 = set(['red wine-a', 'red wine-b', 'red wine-c', 'red wine-d',
            'red wine-e', 'red wine-f', 'red wine-g', 'red wine-h',
            'red wine-i', 'red wine-j', 'red wine-k', 'red wine-l',
            'red wine', 'cheese'])
v2 = venn2([grp1, grp2], set_labels = ('', ''))
v2.get_patch_by_id('10').set_color('yellow')
v2.get_patch_by_id('01').set_color('red')
v2.get_patch_by_id('11').set_color('orange')
v2.get_patch_by_id('10').set_edgecolor('none')
v2.get_patch_by_id('01').set_edgecolor('none')
v2.get_patch_by_id('11').set_edgecolor('none')
v2.get_label_by_id('10').set_text('Only cheese\n(36%)')
v2.get_label_by_id('01').set_text('Only red wine\n(48%)')
v2.get_label_by_id('11').set_text('Both\n(16%)')
plt.show()
     
   
这种维恩图通常可用于零售交易的分析。 假设需要研究奶酪和红酒的受欢迎程度,并且有2500位客户回答了问卷。 根据上图,我们发现在2500位客户中,有900位客户(36%)喜欢奶酪,有1200位客户(48%)喜欢红酒,而400位客户(16%)喜欢这两种产品。
import matplotlib.pyplot as plt
from matplotlib_venn import venn3
plt.figure(figsize=(12,12))
v3 = venn3(subsets = {'100':30, '010':30, '110':17,
                      '001':30, '101':17, '011':17, '111':5},
           set_labels = ('', '', ''))
v3.get_patch_by_id('100').set_color('red')
v3.get_patch_by_id('010').set_color('yellow')
v3.get_patch_by_id('001').set_color('blue')
v3.get_patch_by_id('110').set_color('orange')
v3.get_patch_by_id('101').set_color('purple')
v3.get_patch_by_id('011').set_color('green')
v3.get_patch_by_id('111').set_color('grey')
v3.get_label_by_id('100').set_text('Math')
v3.get_label_by_id('010').set_text('Computer science')
v3.get_label_by_id('001').set_text('Domain expertise')
v3.get_label_by_id('110').set_text('Machine learning')
v3.get_label_by_id('101').set_text('Statistical research')
v3.get_label_by_id('011').set_text('Data processing')
v3.get_label_by_id('111').set_text('Data science')
for text in v3.subset_labels:
    text.set_fontsize(13)
plt.show()
     
   
    
    
    28.面状图(Area chart)
   
    面状图或面积图以图形方式显示定量数据。 它基于折线图。 通常用颜色、纹理和阴影线强调轴和线之间的区域。
    
    可用于显示或比较随时间变化的定量进度。
   
import numpy as np
import matplotlib.pyplot as plt
plt.figure(figsize=(6, 4))
turnover = [2, 7, 14, 17, 20, 27, 30, 38, 25, 18, 6, 1]
plt.fill_between(np.arange(12), turnover, color="skyblue", alpha=0.4)
plt.plot(np.arange(12), turnover, color="Slateblue", alpha=0.6, linewidth=2)
plt.tick_params(labelsize=12)
plt.xticks(np.arange(12), np.arange(1,13))
plt.xlabel('Month', size=12)
plt.ylabel('Turnover (K dollars) of ice-cream', size=12)
plt.ylim(bottom=0)
plt.show()
     
   
假设上面的图描述了一年内冰淇淋销售的营业额。 根据该图,可以清楚地发现,销售在夏天达到顶峰,然后从秋天到冬天下降。
例子2:
import numpy as np
import matplotlib.pyplot as plt
plt.figure(figsize=(9, 6))
year_n_1 = [1.5, 3, 10, 13, 22, 36, 30, 33, 24.5, 15, 6.5, 1.2]
year_n = [2, 7, 14, 17, 20, 27, 30, 38, 25, 18, 6, 1]
plt.fill_between(np.arange(12), year_n_1, color="lightpink", alpha=0.5, label='year N-1')
plt.fill_between(np.arange(12), year_n, color="skyblue", alpha=0.5, label='year N')
plt.tick_params(labelsize=12)
plt.xticks(np.arange(12), np.arange(1,13))
plt.xlabel('Month', size=12)
plt.ylabel('Turnover (K dollars) of ice-cream', size=12)
plt.ylim(bottom=0)
plt.legend()
plt.show()
     
   
    
    
    29.树地图
   
    树地图将层次结构(树状结构)数据显示为一组嵌套矩形。 树的每个分支都有一个矩形,然后用代表子分支的较小矩形平铺。 叶节点的矩形的面积与数据的指定尺寸成比例。 通常,叶节点被着色以显示数据的单独维度。
    
    树地图的思想就是通过方块的面积来表示,面积越大,其代表的值就越大,反之亦然。
    
    使用时安装squarify:pip install squarify
   
(base) C:\Users\toto>pip install squarify
Collecting squarify
  Downloading squarify-0.4.3-py3-none-any.whl (4.3 kB)
Installing collected packages: squarify
Successfully installed squarify-0.4.3
(base) C:\Users\toto>
函数语法及参数:
squarify.plot(sizes, 
            norm_x=100, 
            norm_y=100, 
            color=None, 
            label=None, 
            value=None, 
            alpha,
            **kwargs)
    sizes:指定离散变量各水平对应的数值,即反映树地图子块的面积大小;
    
    norm_x:默认将x轴的范围限定在0-100之内;
    
    norm_y:默认将y轴的范围限定在0-100之内;
    
    color:自定义设置树地图子块的填充色;
    
    label:为每个子块指定标签;
    
    value:为每个子块添加数值大小的标签;
    
    alpha:设置填充色的透明度;
    
    **kwargs:关键字参数,与条形图的关键字参数类似,如设置边框色、边框粗细等。
   
import matplotlib.pyplot as plt
import squarify
# 中文及负号处理办法
plt.rcParams['font.sans-serif'] = 'Microsoft YaHei'
# plt.rcParams['axes.unicode_minus'] = False
# 数据创建
name = ['上海GDP', '北京GDP', '深圳GDP', '广州GDP',
        '重庆GDP', '苏州GDP', '成都GDP', '武汉GDP',
        '杭州GDP', '天津GDP', '南京GDP', '长沙GDP',
        '宁波GDP', '无锡GDP', '青岛GDP', '郑州GDP',
        '佛山GDP', '泉州GDP', '东莞GDP', '济南GDP']
income = [38155, 35371, 26927, 23628, 23605, 19235, 17012, 16900, 15373, 14104,
          14030, 12580, 11985, 11852, 11741, 11380, 10751, 9946, 9482, 9443]
# 绘图details
colors = ['steelblue', '#9999ff', 'red', 'indianred', 'deepskyblue', 'lime', 'magenta', 'violet', 'peru', 'green',
          'yellow', 'orange', 'tomato', 'lawngreen', 'cyan', 'darkcyan', 'dodgerblue', 'teal', 'tan', 'royalblue']
plot = squarify.plot(sizes=income,  # 指定绘图数据
                     label=name,  # 指定标签
                     color=colors,  # 指定自定义颜色
                     alpha=0.6,  # 指定透明度
                     value=income,  # 添加数值标签
                     edgecolor='white',  # 设置边界框为白色
                     linewidth=3  # 设置边框宽度为3
                     )
# 设置标签大小为10
plt.rc('font', size=10)
# 设置标题和字体大小
plot.set_title('2019年城市GDP排名前20(亿元)', fontdict={'fontsize': 15})
# 去除坐标轴
plt.axis('off')
# 除上边框和右边框刻度
plt.tick_params(top='off', right='off')
# 图形展示
plt.show()
     
   
 
