Python-seaborn 数据可视化


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
In [28]:
crash_df=sns.load_dataset("car_crashes")
In [20]:
crash_df.head()
Out[20]:

total
speeding
alcohol
not_distracted
no_previous
ins_premium
ins_losses
abbrev
0
18.8
7.332
5.640
18.048
15.040
784.55
145.08
AL
1
18.1
7.421
4.525
16.290
17.014
1053.48
133.93
AK
2
18.6
6.510
5.208
15.624
17.856
899.47
110.35
AZ
3
22.4
4.032
5.824
21.056
21.280
827.34
142.39
AR
4
12.0
4.200
3.360
10.920
10.680
878.41
165.63
CA

distribution plot

In [28]:
sns.histplot(crash['not_distracted'],kde=True,bins=10)## kde默认值为False, 表示趋势线## bins默认值为10, 表示分为几份
Out[28]:
<AxesSubplot:xlabel='not_distracted', ylabel='Count'>

joint plot

In [35]:
sns.jointplot(x='speeding',y='alcohol',data=crash_df,kind='reg')## kind : { "scatter" | "kde" | "hist" | "hex" | "reg" | "resid" }
Out[35]:
<seaborn.axisgrid.JointGrid at 0x7fe8a3d8cf90>

KDE plot

In [36]:
sns.kdeplot(crash_df['alcohol'])
Out[36]:
<AxesSubplot:xlabel='alcohol', ylabel='Density'>

pair plot

In [2]:
tips_df=sns.load_dataset('tips')sns.pairplot(tips_df,hue='smoker')
Out[2]:
<seaborn.axisgrid.PairGrid at 0x7ff6c2684b90>

In [62]:
tips_df.head()
Out[62]:

total_bill
tip
sex
smoker
day
time
size
0
16.99
1.01
Female
No
Sun
Dinner
2
1
10.34
1.66
Male
No
Sun
Dinner
3
2
21.01
3.50
Male
No
Sun
Dinner
3
3
23.68
3.31
Male
No
Sun
Dinner
2
4
24.59
3.61
Female
No
Sun
Dinner
4

styling

In [67]:
sns.set_style('ticks')## {darkgrid, whitegrid, dark, white, ticks}
plt.figure(figsize=(8,4))##
sns.set_context('paper',font_scale=1.6)##{paper, notebook, talk, poster}
sns.jointplot(x='speeding',y='alcohol',data=crash_df,kind='reg')
sns.despine(left=True)##去掉坐标轴线

<Figure size 576x288 with 0 Axes>

bar plot

In [3]:
sns.barplot(x='sex',y='total_bill',data=tips_df,estimator=np.std)##
Out[3]:
<AxesSubplot:xlabel='sex', ylabel='total_bill'>

box plot

In [6]:
sns.boxplot(x='day',y='total_bill',data=tips_df,hue='sex')
plt.legend(loc=1)## 改变legend位置
Out[6]:
<matplotlib.legend.Legend at 0x7ff6bc7e7710>

violin plot

In [9]:
sns.violinplot(x='day',y='total_bill',data=tips_df,hue='sex')##本质上基于kde曲线绘制
Out[9]:
<AxesSubplot:xlabel='day', ylabel='total_bill'>

In [10]:
sns.violinplot(x='day',y='total_bill',data=tips_df,hue='sex',split=True)
Out[10]:
<AxesSubplot:xlabel='day', ylabel='total_bill'>

strip plot

In [15]:
sns.stripplot(x='day',y='total_bill',data=tips_df,jitter=True,hue='sex',dodge=True)##基于scatter
Out[15]:
<AxesSubplot:xlabel='day', ylabel='total_bill'>

swarm plot( violin + strip )

In [26]:
sns.violinplot(x='day',y='total_bill',data=tips_df,hue='sex',split=True)
sns.swarmplot(x='day',y='total_bill',data=tips_df,color='white')
Out[26]:
<AxesSubplot:xlabel='day', ylabel='total_bill'>

In [27]:
sns.swarmplot(x='day',y='total_bill',data=tips_df)
Out[27]:
<AxesSubplot:xlabel='day', ylabel='total_bill'>

heatmap

In [29]:
crash_mx=crash_df.corr()
crash_mx
Out[29]:

total
speeding
alcohol
not_distracted
no_previous
ins_premium
ins_losses
total
1.000000
0.611548
0.852613
0.827560
0.956179
-0.199702
-0.036011
speeding
0.611548
1.000000
0.669719
0.588010
0.571976
-0.077675
-0.065928
alcohol
0.852613
0.669719
1.000000
0.732816
0.783520
-0.170612
-0.112547
not_distracted
0.827560
0.588010
0.732816
1.000000
0.747307
-0.174856
-0.075970
no_previous
0.956179
0.571976
0.783520
0.747307
1.000000
-0.156895
-0.006359
ins_premium
-0.199702
-0.077675
-0.170612
-0.174856
-0.156895
1.000000
0.623116
ins_losses
-0.036011
-0.065928
-0.112547
-0.075970
-0.006359
0.623116
1.000000
In [32]:
sns.heatmap(crash_mx,annot=True,cmap='Blues')
Out[32]:
<AxesSubplot:>

In [34]:
flights=sns.load_dataset('flights')flights=flights.pivot_table(index='month',columns='year',values='passengers')
flights
Out[34]:
year
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
month












Jan
112
115
145
171
196
204
242
284
315
340
360
417
Feb
118
126
150
180
196
188
233
277
301
318
342
391
Mar
132
141
178
193
236
235
267
317
356
362
406
419
Apr
129
135
163
181
235
227
269
313
348
348
396
461
May
121
125
172
183
229
234
270
318
355
363
420
472
Jun
135
149
178
218
243
264
315
374
422
435
472
535
Jul
148
170
199
230
264
302
364
413
465
491
548
622
Aug
148
170
199
242
272
293
347
405
467
505
559
606
Sep
136
158
184
209
237
259
312
355
404
404
463
508
Oct
119
133
162
191
211
229
274
306
347
359
407
461
Nov
104
114
146
172
180
203
237
271
305
310
362
390
Dec
118
140
166
194
201
229
278
306
336
337
405
432
In [39]:
sns.heatmap(flights,cmap='Blues',linecolor='white',linewidth=1)
Out[39]:
<AxesSubplot:xlabel='year', ylabel='month'>

cluster map

In [40]:
iris=sns.load_dataset('iris')iris.head()
Out[40]:

sepal_length
sepal_width
petal_length
petal_width
species
0
5.1
3.5
1.4
0.2
setosa
1
4.9
3.0
1.4
0.2
setosa
2
4.7
3.2
1.3
0.2
setosa
3
4.6
3.1
1.5
0.2
setosa
4
5.0
3.6
1.4
0.2
setosa
In [41]:
species=iris.pop('species')
In [43]:
sns.clustermap(iris,cmap='Blues')
Out[43]:
<seaborn.matrix.ClusterGrid at 0x7ff6af112050>

pairgrid

In [51]:
iris=sns.load_dataset('iris')
iris_g=sns.PairGrid(iris,hue='species')
iris_g.map_diag(plt.hist)
iris_g.map_offdiag(plt.scatter)
iris_g.map_lower(sns.kdeplot)
Out[51]:
<seaborn.axisgrid.PairGrid at 0x7ff6ad5a08d0>

facet grid

In [52]:
tips_fg=sns.FacetGrid(tips_df,col='time',row='smoker')
tips_fg.map(plt.scatter,'total_bill','tip')
Out[52]:
<seaborn.axisgrid.FacetGrid at 0x7ff6a9b2e710>

In [54]:
tips_fg=sns.FacetGrid(tips_df,col='time',hue='smoker',height=4,aspect=1.3,col_order=['Dinner','Lunch'],palette='Set1')
tips_fg.map(plt.scatter,'total_bill','tip',edgecolor='w')
Out[54]:
<seaborn.axisgrid.FacetGrid at 0x7ff6a994df50>

In [55]:
kws=dict(s=50,linewidth=.5,edgecolor='w')
In [56]:
tips_fg=sns.FacetGrid(tips_df,col='sex',hue='smoker',height=4,aspect=1.3,hue_order=['Yes','No'],hue_kws=dict(marker=['^','v']))
tips_fg.map(plt.scatter,'total_bill','tip',**kws)
Out[56]:
<seaborn.axisgrid.FacetGrid at 0x7ff6a98a8f50>



Tags: