using Gadfly, RDatasets
set_default_plot_size(21cm, 8cm)
p1 = plot(dataset("datasets", "iris"), x="SepalLength", y="SepalWidth",
Geom.point)
p2 = plot(dataset("datasets", "iris"), x="SepalLength", y="SepalWidth",
Stat.binmean, Geom.point)
hstack(p1,p2)
SepalLength
-1
0
1
2
3
4
5
6
7
8
9
10
11
12
13
0.0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
4.0
4.5
5.0
5.5
6.0
6.5
7.0
7.5
8.0
8.5
9.0
9.5
10.0
10.5
11.0
11.5
12.0
0
5
10
15
0.0
0.2
0.4
0.6
0.8
1.0
1.2
1.4
1.6
1.8
2.0
2.2
2.4
2.6
2.8
3.0
3.2
3.4
3.6
3.8
4.0
4.2
4.4
4.6
4.8
5.0
5.2
5.4
5.6
5.8
6.0
6.2
6.4
6.6
6.8
7.0
7.2
7.4
7.6
7.8
8.0
8.2
8.4
8.6
8.8
9.0
9.2
9.4
9.6
9.8
10.0
10.2
10.4
10.6
10.8
11.0
11.2
11.4
11.6
11.8
12.0
h,j,k,l,arrows,drag to pan
i,o,+,-,scroll,shift-drag to zoom
r,dbl-click to reset
c for coordinates
? for help
?
1.75
2.00
2.25
2.50
2.75
3.00
3.25
3.50
3.75
4.00
4.25
4.50
2.00
2.05
2.10
2.15
2.20
2.25
2.30
2.35
2.40
2.45
2.50
2.55
2.60
2.65
2.70
2.75
2.80
2.85
2.90
2.95
3.00
3.05
3.10
3.15
3.20
3.25
3.30
3.35
3.40
3.45
3.50
3.55
3.60
3.65
3.70
3.75
3.80
3.85
3.90
3.95
4.00
4.05
4.10
4.15
4.20
4.25
2
3
4
5
2.00
2.05
2.10
2.15
2.20
2.25
2.30
2.35
2.40
2.45
2.50
2.55
2.60
2.65
2.70
2.75
2.80
2.85
2.90
2.95
3.00
3.05
3.10
3.15
3.20
3.25
3.30
3.35
3.40
3.45
3.50
3.55
3.60
3.65
3.70
3.75
3.80
3.85
3.90
3.95
4.00
4.05
4.10
4.15
4.20
4.25
SepalWidth
SepalLength
-1
0
1
2
3
4
5
6
7
8
9
10
11
12
13
0.0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
4.0
4.5
5.0
5.5
6.0
6.5
7.0
7.5
8.0
8.5
9.0
9.5
10.0
10.5
11.0
11.5
12.0
0
5
10
15
0.0
0.2
0.4
0.6
0.8
1.0
1.2
1.4
1.6
1.8
2.0
2.2
2.4
2.6
2.8
3.0
3.2
3.4
3.6
3.8
4.0
4.2
4.4
4.6
4.8
5.0
5.2
5.4
5.6
5.8
6.0
6.2
6.4
6.6
6.8
7.0
7.2
7.4
7.6
7.8
8.0
8.2
8.4
8.6
8.8
9.0
9.2
9.4
9.6
9.8
10.0
10.2
10.4
10.6
10.8
11.0
11.2
11.4
11.6
11.8
12.0
h,j,k,l,arrows,drag to pan
i,o,+,-,scroll,shift-drag to zoom
r,dbl-click to reset
c for coordinates
? for help
?
-1.0
-0.5
0.0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
4.0
4.5
5.0
5.5
6.0
6.5
7.0
7.5
-0.6
-0.4
-0.2
0.0
0.2
0.4
0.6
0.8
1.0
1.2
1.4
1.6
1.8
2.0
2.2
2.4
2.6
2.8
3.0
3.2
3.4
3.6
3.8
4.0
4.2
4.4
4.6
4.8
5.0
5.2
5.4
5.6
5.8
6.0
6.2
6.4
6.6
6.8
7.0
-2.5
0.0
2.5
5.0
7.5
-0.5
-0.4
-0.3
-0.2
-0.1
0.0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
1.0
1.1
1.2
1.3
1.4
1.5
1.6
1.7
1.8
1.9
2.0
2.1
2.2
2.3
2.4
2.5
2.6
2.7
2.8
2.9
3.0
3.1
3.2
3.3
3.4
3.5
3.6
3.7
3.8
3.9
4.0
4.1
4.2
4.3
4.4
4.5
4.6
4.7
4.8
4.9
5.0
5.1
5.2
5.3
5.4
5.5
5.6
5.7
5.8
5.9
6.0
6.1
6.2
6.3
6.4
6.5
6.6
6.7
6.8
6.9
7.0
SepalWidth
using DataFrames, Gadfly, Distributions
set_default_plot_size(21cm, 8cm)
x = -4:0.1:4
Da = [DataFrame(x=x, ymax=pdf.(Normal(μ),x), ymin=0.0, u="μ=$μ") for μ in [-1,1]]
Db = [DataFrame(x=randn(200).+μ, u="μ=$μ") for μ in [-1,1]]
p1 = plot(vcat(Da...), x=:x, y=:ymax, ymin=:ymin, ymax=:ymax, color=:u,
Geom.line, Geom.ribbon, Guide.ylabel("Density"), Theme(alphas=[0.6]),
Guide.colorkey(title="", pos=[2.5,0.6]), Guide.title("Parametric PDF")
)
p2 = plot(vcat(Db...), x=:x, color=:u, Theme(alphas=[0.6]),
Stat.density(bandwidth=0.5), Geom.polygon(fill=true, preserve_order=true),
Coord.cartesian(xmin=-4, xmax=4, ymin=0, ymax=0.4),
Guide.colorkey(title="", pos=[2.5,0.6]), Guide.title("Kernel PDF")
)
hstack(p1,p2)
x
-14
-12
-10
-8
-6
-4
-2
0
2
4
6
8
10
12
14
-12.0
-11.5
-11.0
-10.5
-10.0
-9.5
-9.0
-8.5
-8.0
-7.5
-7.0
-6.5
-6.0
-5.5
-5.0
-4.5
-4.0
-3.5
-3.0
-2.5
-2.0
-1.5
-1.0
-0.5
0.0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
4.0
4.5
5.0
5.5
6.0
6.5
7.0
7.5
8.0
8.5
9.0
9.5
10.0
10.5
11.0
11.5
12.0
-20
-10
0
10
20
-12.0
-11.5
-11.0
-10.5
-10.0
-9.5
-9.0
-8.5
-8.0
-7.5
-7.0
-6.5
-6.0
-5.5
-5.0
-4.5
-4.0
-3.5
-3.0
-2.5
-2.0
-1.5
-1.0
-0.5
0.0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
4.0
4.5
5.0
5.5
6.0
6.5
7.0
7.5
8.0
8.5
9.0
9.5
10.0
10.5
11.0
11.5
12.0
h,j,k,l,arrows,drag to pan
i,o,+,-,scroll,shift-drag to zoom
r,dbl-click to reset
c for coordinates
? for help
?
μ=-1
μ=1
-0.5
-0.4
-0.3
-0.2
-0.1
0.0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
-0.4
-0.3
-0.2
-0.1
0.0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
-0.5
0.0
0.5
1.0
-0.40
-0.38
-0.36
-0.34
-0.32
-0.30
-0.28
-0.26
-0.24
-0.22
-0.20
-0.18
-0.16
-0.14
-0.12
-0.10
-0.08
-0.06
-0.04
-0.02
0.00
0.02
0.04
0.06
0.08
0.10
0.12
0.14
0.16
0.18
0.20
0.22
0.24
0.26
0.28
0.30
0.32
0.34
0.36
0.38
0.40
0.42
0.44
0.46
0.48
0.50
0.52
0.54
0.56
0.58
0.60
0.62
0.64
0.66
0.68
0.70
0.72
0.74
0.76
0.78
0.80
0.82
Kernel PDF
x
-14
-12
-10
-8
-6
-4
-2
0
2
4
6
8
10
12
14
-12.0
-11.5
-11.0
-10.5
-10.0
-9.5
-9.0
-8.5
-8.0
-7.5
-7.0
-6.5
-6.0
-5.5
-5.0
-4.5
-4.0
-3.5
-3.0
-2.5
-2.0
-1.5
-1.0
-0.5
0.0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
4.0
4.5
5.0
5.5
6.0
6.5
7.0
7.5
8.0
8.5
9.0
9.5
10.0
10.5
11.0
11.5
12.0
-20
-10
0
10
20
-12.0
-11.5
-11.0
-10.5
-10.0
-9.5
-9.0
-8.5
-8.0
-7.5
-7.0
-6.5
-6.0
-5.5
-5.0
-4.5
-4.0
-3.5
-3.0
-2.5
-2.0
-1.5
-1.0
-0.5
0.0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
4.0
4.5
5.0
5.5
6.0
6.5
7.0
7.5
8.0
8.5
9.0
9.5
10.0
10.5
11.0
11.5
12.0
h,j,k,l,arrows,drag to pan
i,o,+,-,scroll,shift-drag to zoom
r,dbl-click to reset
c for coordinates
? for help
?
μ=-1
μ=1
-0.5
-0.4
-0.3
-0.2
-0.1
0.0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
-0.4
-0.3
-0.2
-0.1
0.0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
-0.5
0.0
0.5
1.0
-0.40
-0.38
-0.36
-0.34
-0.32
-0.30
-0.28
-0.26
-0.24
-0.22
-0.20
-0.18
-0.16
-0.14
-0.12
-0.10
-0.08
-0.06
-0.04
-0.02
0.00
0.02
0.04
0.06
0.08
0.10
0.12
0.14
0.16
0.18
0.20
0.22
0.24
0.26
0.28
0.30
0.32
0.34
0.36
0.38
0.40
0.42
0.44
0.46
0.48
0.50
0.52
0.54
0.56
0.58
0.60
0.62
0.64
0.66
0.68
0.70
0.72
0.74
0.76
0.78
0.80
0.82
Density
Parametric PDF
using CategoricalArrays
using Gadfly
set_default_plot_size(14cm, 8cm)
n = 400
group = repeat([-1, 1], inner=200)
x = randn(n) .+ group
plot(x=x, color=categorical(group), Guide.colorkey(title="", pos=[3.6,0.7]),
layer(Stat.density, Geom.line, Geom.polygon(fill=true, preserve_order=true), alpha=[0.4]),
layer(Stat.quantile_bars(quantiles=[0.05, 0.95]), Geom.segment),
Guide.title("Density with bars showing the central 90% CI"),
Guide.ylabel("Density"), Coord.cartesian(xmin=-4, xmax=4)
)
x
-14
-12
-10
-8
-6
-4
-2
0
2
4
6
8
10
12
14
-12.0
-11.5
-11.0
-10.5
-10.0
-9.5
-9.0
-8.5
-8.0
-7.5
-7.0
-6.5
-6.0
-5.5
-5.0
-4.5
-4.0
-3.5
-3.0
-2.5
-2.0
-1.5
-1.0
-0.5
0.0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
4.0
4.5
5.0
5.5
6.0
6.5
7.0
7.5
8.0
8.5
9.0
9.5
10.0
10.5
11.0
11.5
12.0
-20
-10
0
10
20
-12.0
-11.5
-11.0
-10.5
-10.0
-9.5
-9.0
-8.5
-8.0
-7.5
-7.0
-6.5
-6.0
-5.5
-5.0
-4.5
-4.0
-3.5
-3.0
-2.5
-2.0
-1.5
-1.0
-0.5
0.0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
4.0
4.5
5.0
5.5
6.0
6.5
7.0
7.5
8.0
8.5
9.0
9.5
10.0
10.5
11.0
11.5
12.0
h,j,k,l,arrows,drag to pan
i,o,+,-,scroll,shift-drag to zoom
r,dbl-click to reset
c for coordinates
? for help
?
-1
1
-0.6
-0.5
-0.4
-0.3
-0.2
-0.1
0.0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
1.0
1.1
-0.50
-0.45
-0.40
-0.35
-0.30
-0.25
-0.20
-0.15
-0.10
-0.05
0.00
0.05
0.10
0.15
0.20
0.25
0.30
0.35
0.40
0.45
0.50
0.55
0.60
0.65
0.70
0.75
0.80
0.85
0.90
0.95
1.00
-0.5
0.0
0.5
1.0
-0.50
-0.48
-0.46
-0.44
-0.42
-0.40
-0.38
-0.36
-0.34
-0.32
-0.30
-0.28
-0.26
-0.24
-0.22
-0.20
-0.18
-0.16
-0.14
-0.12
-0.10
-0.08
-0.06
-0.04
-0.02
0.00
0.02
0.04
0.06
0.08
0.10
0.12
0.14
0.16
0.18
0.20
0.22
0.24
0.26
0.28
0.30
0.32
0.34
0.36
0.38
0.40
0.42
0.44
0.46
0.48
0.50
0.52
0.54
0.56
0.58
0.60
0.62
0.64
0.66
0.68
0.70
0.72
0.74
0.76
0.78
0.80
0.82
0.84
0.86
0.88
0.90
0.92
0.94
0.96
0.98
1.00
Density
Density with bars showing the central 90% CI
using DataFrames, Gadfly, RDatasets, Statistics
set_default_plot_size(21cm, 8cm)
salaries = dataset("car","Salaries")
salaries.Salary /= 1000.0
salaries.Discipline = ["Discipline $(x)" for x in salaries.Discipline]
df = combine(groupby(salaries, [:Rank, :Discipline]), :Salary.=>mean)
df.label = string.(round.(Int, df.Salary_mean))
p1 = plot(df, x=:Discipline, y=:Salary_mean, color=:Rank,
Scale.x_discrete(levels=["Discipline A", "Discipline B"]),
label=:label, Geom.label(position=:centered), Stat.dodge(position=:stack),
Geom.bar(position=:stack)
)
p2 = plot(df, y=:Discipline, x=:Salary_mean, color=:Rank,
Coord.cartesian(yflip=true), Scale.y_discrete,
label=:label, Geom.label(position=:right), Stat.dodge(axis=:y),
Geom.bar(position=:dodge, orientation=:horizontal),
Scale.color_discrete(levels=["Prof", "AssocProf", "AsstProf"]),
Guide.yticks(orientation=:vertical), Guide.ylabel(nothing)
)
hstack(p1, p2)
Salary_mean
-200
-150
-100
-50
0
50
100
150
200
250
300
350
-150
-140
-130
-120
-110
-100
-90
-80
-70
-60
-50
-40
-30
-20
-10
0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
-200
0
200
400
-150
-145
-140
-135
-130
-125
-120
-115
-110
-105
-100
-95
-90
-85
-80
-75
-70
-65
-60
-55
-50
-45
-40
-35
-30
-25
-20
-15
-10
-5
0
5
10
15
20
25
30
35
40
45
50
55
60
65
70
75
80
85
90
95
100
105
110
115
120
125
130
135
140
145
150
155
160
165
170
175
180
185
190
195
200
205
210
215
220
225
230
235
240
245
250
255
260
265
270
275
280
285
290
295
300
Prof
AssocProf
AsstProf
Rank
133
85
101
120
83
74
h,j,k,l,arrows,drag to pan
i,o,+,-,scroll,shift-drag to zoom
r,dbl-click to reset
c for coordinates
? for help
?
Discipline B
Discipline A
Discipline B
Discipline A
Discipline
Discipline A
Discipline B
Prof
AsstProf
AssocProf
Rank
133
85
101
120
83
74
h,j,k,l,arrows,drag to pan
i,o,+,-,scroll,shift-drag to zoom
r,dbl-click to reset
c for coordinates
? for help
?
-500
-400
-300
-200
-100
0
100
200
300
400
500
600
700
800
900
-400
-350
-300
-250
-200
-150
-100
-50
0
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
-500
0
500
1000
-400
-380
-360
-340
-320
-300
-280
-260
-240
-220
-200
-180
-160
-140
-120
-100
-80
-60
-40
-20
0
20
40
60
80
100
120
140
160
180
200
220
240
260
280
300
320
340
360
380
400
420
440
460
480
500
520
540
560
580
600
620
640
660
680
700
720
740
760
780
800
Salary_mean