Python-seaborn 数据可视化
import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as sns
In [28]:
crash_df=sns.load_dataset("car_crashes")
In [20]:
crash_df.head()
Out[20]:
total | speeding | alcohol | not_distracted | no_previous | ins_premium | ins_losses | abbrev | |
---|---|---|---|---|---|---|---|---|
0 | 18.8 | 7.332 | 5.640 | 18.048 | 15.040 | 784.55 | 145.08 | AL |
1 | 18.1 | 7.421 | 4.525 | 16.290 | 17.014 | 1053.48 | 133.93 | AK |
2 | 18.6 | 6.510 | 5.208 | 15.624 | 17.856 | 899.47 | 110.35 | AZ |
3 | 22.4 | 4.032 | 5.824 | 21.056 | 21.280 | 827.34 | 142.39 | AR |
4 | 12.0 | 4.200 | 3.360 | 10.920 | 10.680 | 878.41 | 165.63 | CA |
distribution plot
In [28]:
sns.histplot(crash['not_distracted'],kde=True,bins=10)## kde默认值为False, 表示趋势线## bins默认值为10, 表示分为几份
Out[28]:
<AxesSubplot:xlabel='not_distracted', ylabel='Count'>
joint plot
In [35]:
sns.jointplot(x='speeding',y='alcohol',data=crash_df,kind='reg')## kind : { "scatter" | "kde" | "hist" | "hex" | "reg" | "resid" }
Out[35]:
<seaborn.axisgrid.JointGrid at 0x7fe8a3d8cf90>
KDE plot
In [36]:
sns.kdeplot(crash_df['alcohol'])
Out[36]:
<AxesSubplot:xlabel='alcohol', ylabel='Density'>
pair plot
In [2]:
tips_df=sns.load_dataset('tips')sns.pairplot(tips_df,hue='smoker')
Out[2]:
<seaborn.axisgrid.PairGrid at 0x7ff6c2684b90>
In [62]:
tips_df.head()
Out[62]:
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
styling
In [67]:
sns.set_style('ticks')## {darkgrid, whitegrid, dark, white, ticks}plt.figure(figsize=(8,4))##sns.set_context('paper',font_scale=1.6)##{paper, notebook, talk, poster}sns.jointplot(x='speeding',y='alcohol',data=crash_df,kind='reg')sns.despine(left=True)##去掉坐标轴线
<Figure size 576x288 with 0 Axes>
bar plot
In [3]:
sns.barplot(x='sex',y='total_bill',data=tips_df,estimator=np.std)##
Out[3]:
<AxesSubplot:xlabel='sex', ylabel='total_bill'>
box plot
In [6]:
sns.boxplot(x='day',y='total_bill',data=tips_df,hue='sex')plt.legend(loc=1)## 改变legend位置
Out[6]:
<matplotlib.legend.Legend at 0x7ff6bc7e7710>
violin plot
In [9]:
sns.violinplot(x='day',y='total_bill',data=tips_df,hue='sex')##本质上基于kde曲线绘制
Out[9]:
<AxesSubplot:xlabel='day', ylabel='total_bill'>
In [10]:
sns.violinplot(x='day',y='total_bill',data=tips_df,hue='sex',split=True)
Out[10]:
<AxesSubplot:xlabel='day', ylabel='total_bill'>
strip plot
In [15]:
sns.stripplot(x='day',y='total_bill',data=tips_df,jitter=True,hue='sex',dodge=True)##基于scatter
Out[15]:
<AxesSubplot:xlabel='day', ylabel='total_bill'>
swarm plot( violin + strip )
In [26]:
sns.violinplot(x='day',y='total_bill',data=tips_df,hue='sex',split=True)sns.swarmplot(x='day',y='total_bill',data=tips_df,color='white')
Out[26]:
<AxesSubplot:xlabel='day', ylabel='total_bill'>
In [27]:
sns.swarmplot(x='day',y='total_bill',data=tips_df)
Out[27]:
<AxesSubplot:xlabel='day', ylabel='total_bill'>
heatmap
In [29]:
crash_mx=crash_df.corr()crash_mx
Out[29]:
total | speeding | alcohol | not_distracted | no_previous | ins_premium | ins_losses | |
---|---|---|---|---|---|---|---|
total | 1.000000 | 0.611548 | 0.852613 | 0.827560 | 0.956179 | -0.199702 | -0.036011 |
speeding | 0.611548 | 1.000000 | 0.669719 | 0.588010 | 0.571976 | -0.077675 | -0.065928 |
alcohol | 0.852613 | 0.669719 | 1.000000 | 0.732816 | 0.783520 | -0.170612 | -0.112547 |
not_distracted | 0.827560 | 0.588010 | 0.732816 | 1.000000 | 0.747307 | -0.174856 | -0.075970 |
no_previous | 0.956179 | 0.571976 | 0.783520 | 0.747307 | 1.000000 | -0.156895 | -0.006359 |
ins_premium | -0.199702 | -0.077675 | -0.170612 | -0.174856 | -0.156895 | 1.000000 | 0.623116 |
ins_losses | -0.036011 | -0.065928 | -0.112547 | -0.075970 | -0.006359 | 0.623116 | 1.000000 |
In [32]:
sns.heatmap(crash_mx,annot=True,cmap='Blues')
Out[32]:
<AxesSubplot:>
In [34]:
flights=sns.load_dataset('flights')flights=flights.pivot_table(index='month',columns='year',values='passengers')flights
Out[34]:
year | 1949 | 1950 | 1951 | 1952 | 1953 | 1954 | 1955 | 1956 | 1957 | 1958 | 1959 | 1960 |
---|---|---|---|---|---|---|---|---|---|---|---|---|
month | ||||||||||||
Jan | 112 | 115 | 145 | 171 | 196 | 204 | 242 | 284 | 315 | 340 | 360 | 417 |
Feb | 118 | 126 | 150 | 180 | 196 | 188 | 233 | 277 | 301 | 318 | 342 | 391 |
Mar | 132 | 141 | 178 | 193 | 236 | 235 | 267 | 317 | 356 | 362 | 406 | 419 |
Apr | 129 | 135 | 163 | 181 | 235 | 227 | 269 | 313 | 348 | 348 | 396 | 461 |
May | 121 | 125 | 172 | 183 | 229 | 234 | 270 | 318 | 355 | 363 | 420 | 472 |
Jun | 135 | 149 | 178 | 218 | 243 | 264 | 315 | 374 | 422 | 435 | 472 | 535 |
Jul | 148 | 170 | 199 | 230 | 264 | 302 | 364 | 413 | 465 | 491 | 548 | 622 |
Aug | 148 | 170 | 199 | 242 | 272 | 293 | 347 | 405 | 467 | 505 | 559 | 606 |
Sep | 136 | 158 | 184 | 209 | 237 | 259 | 312 | 355 | 404 | 404 | 463 | 508 |
Oct | 119 | 133 | 162 | 191 | 211 | 229 | 274 | 306 | 347 | 359 | 407 | 461 |
Nov | 104 | 114 | 146 | 172 | 180 | 203 | 237 | 271 | 305 | 310 | 362 | 390 |
Dec | 118 | 140 | 166 | 194 | 201 | 229 | 278 | 306 | 336 | 337 | 405 | 432 |
In [39]:
sns.heatmap(flights,cmap='Blues',linecolor='white',linewidth=1)
Out[39]:
<AxesSubplot:xlabel='year', ylabel='month'>
cluster map
In [40]:
iris=sns.load_dataset('iris')iris.head()
Out[40]:
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
In [41]:
species=iris.pop('species')
In [43]:
sns.clustermap(iris,cmap='Blues')
Out[43]:
<seaborn.matrix.ClusterGrid at 0x7ff6af112050>
pairgrid
In [51]:
iris=sns.load_dataset('iris')iris_g=sns.PairGrid(iris,hue='species')iris_g.map_diag(plt.hist)iris_g.map_offdiag(plt.scatter)iris_g.map_lower(sns.kdeplot)
Out[51]:
<seaborn.axisgrid.PairGrid at 0x7ff6ad5a08d0>
facet grid
In [52]:
tips_fg=sns.FacetGrid(tips_df,col='time',row='smoker')tips_fg.map(plt.scatter,'total_bill','tip')
Out[52]:
<seaborn.axisgrid.FacetGrid at 0x7ff6a9b2e710>
In [54]:
tips_fg=sns.FacetGrid(tips_df,col='time',hue='smoker',height=4,aspect=1.3,col_order=['Dinner','Lunch'],palette='Set1')tips_fg.map(plt.scatter,'total_bill','tip',edgecolor='w')
Out[54]:
<seaborn.axisgrid.FacetGrid at 0x7ff6a994df50>
In [55]:
kws=dict(s=50,linewidth=.5,edgecolor='w')
In [56]:
tips_fg=sns.FacetGrid(tips_df,col='sex',hue='smoker',height=4,aspect=1.3,hue_order=['Yes','No'],hue_kws=dict(marker=['^','v']))tips_fg.map(plt.scatter,'total_bill','tip',**kws)
Out[56]:
<seaborn.axisgrid.FacetGrid at 0x7ff6a98a8f50>