Statistics

Stat.binmean

using Gadfly, RDatasets
set_default_plot_size(21cm, 8cm)
p1 = plot(dataset("datasets", "iris"), x="SepalLength", y="SepalWidth",
          Geom.point)
p2 = plot(dataset("datasets", "iris"), x="SepalLength", y="SepalWidth",
          Stat.binmean, Geom.point)
hstack(p1,p2)
SepalLength -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 0 5 10 15 0.0 0.2 0.4 0.6 0.8 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2 3.4 3.6 3.8 4.0 4.2 4.4 4.6 4.8 5.0 5.2 5.4 5.6 5.8 6.0 6.2 6.4 6.6 6.8 7.0 7.2 7.4 7.6 7.8 8.0 8.2 8.4 8.6 8.8 9.0 9.2 9.4 9.6 9.8 10.0 10.2 10.4 10.6 10.8 11.0 11.2 11.4 11.6 11.8 12.0 7.6253.0875 7.1400000000000013.2 6.8571428571428563.0714285714285716 6.68000000000000153.0300000000000002 6.5,3.0 6.39999999999999952.9571428571428577 6.2999999999999992.8555555555555556 6.22.8249999999999997 6.052.7916666666666665 5.9000000000000013.0666666666666664 5.7999999999999992.8857142857142857 5.7000000000000013.1 5.60000000000000052.816666666666667 5.4428571428571433.207142857142857 5.2,3.425 5.10000000000000053.477777777777778 4.96253.05625 4.7714285714285713.185714285714286 4.4888888888888893.0777777777777775 h,j,k,l,arrows,drag to pan i,o,+,-,scroll,shift-drag to zoom r,dbl-click to reset c for coordinates ? for help ? 1.75 2.00 2.25 2.50 2.75 3.00 3.25 3.50 3.75 4.00 4.25 4.50 2.00 2.05 2.10 2.15 2.20 2.25 2.30 2.35 2.40 2.45 2.50 2.55 2.60 2.65 2.70 2.75 2.80 2.85 2.90 2.95 3.00 3.05 3.10 3.15 3.20 3.25 3.30 3.35 3.40 3.45 3.50 3.55 3.60 3.65 3.70 3.75 3.80 3.85 3.90 3.95 4.00 4.05 4.10 4.15 4.20 4.25 2 3 4 5 2.00 2.05 2.10 2.15 2.20 2.25 2.30 2.35 2.40 2.45 2.50 2.55 2.60 2.65 2.70 2.75 2.80 2.85 2.90 2.95 3.00 3.05 3.10 3.15 3.20 3.25 3.30 3.35 3.40 3.45 3.50 3.55 3.60 3.65 3.70 3.75 3.80 3.85 3.90 3.95 4.00 4.05 4.10 4.15 4.20 4.25 SepalWidth SepalLength -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 0 5 10 15 0.0 0.2 0.4 0.6 0.8 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2 3.4 3.6 3.8 4.0 4.2 4.4 4.6 4.8 5.0 5.2 5.4 5.6 5.8 6.0 6.2 6.4 6.6 6.8 7.0 7.2 7.4 7.6 7.8 8.0 8.2 8.4 8.6 8.8 9.0 9.2 9.4 9.6 9.8 10.0 10.2 10.4 10.6 10.8 11.0 11.2 11.4 11.6 11.8 12.0 5.9,3.0 6.2,3.4 6.5,3.0 6.3,2.5 6.7,3.0 6.7,3.3 6.8,3.2 5.8,2.7 6.9,3.1 6.7,3.1 6.9,3.1 6.0,3.0 6.4,3.1 6.3,3.4 7.7,3.0 6.1,2.6 6.3,2.8 6.4,2.8 7.9,3.8 7.4,2.8 7.2,3.0 6.4,2.8 6.1,3.0 6.2,2.8 7.2,3.2 6.7,3.3 6.3,2.7 7.7,2.8 5.6,2.8 6.9,3.2 6.0,2.2 7.7,2.6 7.7,3.8 6.5,3.0 6.4,3.2 5.8,2.8 5.7,2.5 6.8,3.0 6.4,2.7 6.5,3.2 7.2,3.6 6.7,2.5 7.3,2.9 4.9,2.5 7.6,3.0 6.5,3.0 6.3,2.9 7.1,3.0 5.8,2.7 6.3,3.3 5.7,2.8 5.1,2.5 6.2,2.9 5.7,2.9 5.7,3.0 5.6,2.7 5.0,2.3 5.8,2.6 6.1,3.0 5.5,2.6 5.5,2.5 5.6,3.0 6.3,2.3 6.7,3.1 6.0,3.4 5.4,3.0 6.0,2.7 5.8,2.7 5.5,2.4 5.5,2.4 5.7,2.6 6.0,2.9 6.7,3.0 6.8,2.8 6.6,3.0 6.4,2.9 6.1,2.8 6.3,2.5 6.1,2.8 5.9,3.2 5.6,2.5 6.2,2.2 5.8,2.7 5.6,3.0 6.7,3.1 5.6,2.9 6.1,2.9 6.0,2.2 5.9,3.0 5.0,2.0 5.2,2.7 6.6,2.9 4.9,2.4 6.3,3.3 5.7,2.8 6.5,2.8 5.5,2.3 6.9,3.1 6.4,3.2 7.0,3.2 5.0,3.3 5.3,3.7 4.6,3.2 5.1,3.8 4.8,3.0 5.1,3.8 5.0,3.5 4.4,3.2 4.5,2.3 5.0,3.5 5.1,3.4 4.4,3.0 4.9,3.6 5.5,3.5 5.0,3.2 4.9,3.1 5.5,4.2 5.2,4.1 5.4,3.4 4.8,3.1 4.7,3.2 5.2,3.4 5.2,3.5 5.0,3.4 5.0,3.0 4.8,3.4 5.1,3.3 4.6,3.6 5.1,3.7 5.4,3.4 5.1,3.8 5.7,3.8 5.1,3.5 5.4,3.9 5.7,4.4 5.8,4.0 4.3,3.0 4.8,3.0 4.8,3.4 5.4,3.7 4.9,3.1 4.4,2.9 5.0,3.4 4.6,3.4 5.4,3.9 5.0,3.6 4.6,3.1 4.7,3.2 4.9,3.0 5.1,3.5 h,j,k,l,arrows,drag to pan i,o,+,-,scroll,shift-drag to zoom r,dbl-click to reset c for coordinates ? for help ? -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 -0.6 -0.4 -0.2 0.0 0.2 0.4 0.6 0.8 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2 3.4 3.6 3.8 4.0 4.2 4.4 4.6 4.8 5.0 5.2 5.4 5.6 5.8 6.0 6.2 6.4 6.6 6.8 7.0 -2.5 0.0 2.5 5.0 7.5 -0.5 -0.4 -0.3 -0.2 -0.1 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5.0 5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9 6.0 6.1 6.2 6.3 6.4 6.5 6.6 6.7 6.8 6.9 7.0 SepalWidth

Stat.density

using DataFrames, Gadfly, Distributions
set_default_plot_size(21cm, 8cm)
x = -4:0.1:4
Da = [DataFrame(x=x, ymax=pdf.(Normal(μ),x), ymin=0.0, u="μ=$μ") for μ in [-1,1]]
Db = [DataFrame(x=randn(200).+μ, u="μ=$μ") for μ in [-1,1]]

p1 = plot(vcat(Da...), x=:x, y=:ymax, ymin=:ymin, ymax=:ymax, color=:u,
    Geom.line, Geom.ribbon, Guide.ylabel("Density"), Theme(alphas=[0.6]),
    Guide.colorkey(title="", pos=[2.5,0.6]), Guide.title("Parametric PDF")
)
p2 = plot(vcat(Db...), x=:x, color=:u, Theme(alphas=[0.6]),
    Stat.density(bandwidth=0.5), Geom.polygon(fill=true, preserve_order=true),
    Coord.cartesian(xmin=-4, xmax=4, ymin=0, ymax=0.4),
    Guide.colorkey(title="", pos=[2.5,0.6]), Guide.title("Kernel PDF")
)
hstack(p1,p2)
x -14 -12 -10 -8 -6 -4 -2 0 2 4 6 8 10 12 14 -12.0 -11.5 -11.0 -10.5 -10.0 -9.5 -9.0 -8.5 -8.0 -7.5 -7.0 -6.5 -6.0 -5.5 -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 -20 -10 0 10 20 -12.0 -11.5 -11.0 -10.5 -10.0 -9.5 -9.0 -8.5 -8.0 -7.5 -7.0 -6.5 -6.0 -5.5 -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 h,j,k,l,arrows,drag to pan i,o,+,-,scroll,shift-drag to zoom r,dbl-click to reset c for coordinates ? for help ? μ=-1 μ=1 -0.5 -0.4 -0.3 -0.2 -0.1 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 -0.4 -0.3 -0.2 -0.1 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 -0.5 0.0 0.5 1.0 -0.40 -0.38 -0.36 -0.34 -0.32 -0.30 -0.28 -0.26 -0.24 -0.22 -0.20 -0.18 -0.16 -0.14 -0.12 -0.10 -0.08 -0.06 -0.04 -0.02 0.00 0.02 0.04 0.06 0.08 0.10 0.12 0.14 0.16 0.18 0.20 0.22 0.24 0.26 0.28 0.30 0.32 0.34 0.36 0.38 0.40 0.42 0.44 0.46 0.48 0.50 0.52 0.54 0.56 0.58 0.60 0.62 0.64 0.66 0.68 0.70 0.72 0.74 0.76 0.78 0.80 0.82 Kernel PDF x -14 -12 -10 -8 -6 -4 -2 0 2 4 6 8 10 12 14 -12.0 -11.5 -11.0 -10.5 -10.0 -9.5 -9.0 -8.5 -8.0 -7.5 -7.0 -6.5 -6.0 -5.5 -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 -20 -10 0 10 20 -12.0 -11.5 -11.0 -10.5 -10.0 -9.5 -9.0 -8.5 -8.0 -7.5 -7.0 -6.5 -6.0 -5.5 -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 h,j,k,l,arrows,drag to pan i,o,+,-,scroll,shift-drag to zoom r,dbl-click to reset c for coordinates ? for help ? μ=-1 μ=1 -0.5 -0.4 -0.3 -0.2 -0.1 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 -0.4 -0.3 -0.2 -0.1 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 -0.5 0.0 0.5 1.0 -0.40 -0.38 -0.36 -0.34 -0.32 -0.30 -0.28 -0.26 -0.24 -0.22 -0.20 -0.18 -0.16 -0.14 -0.12 -0.10 -0.08 -0.06 -0.04 -0.02 0.00 0.02 0.04 0.06 0.08 0.10 0.12 0.14 0.16 0.18 0.20 0.22 0.24 0.26 0.28 0.30 0.32 0.34 0.36 0.38 0.40 0.42 0.44 0.46 0.48 0.50 0.52 0.54 0.56 0.58 0.60 0.62 0.64 0.66 0.68 0.70 0.72 0.74 0.76 0.78 0.80 0.82 Density Parametric PDF

Stat.quantile_bars

using CategoricalArrays
using Gadfly
set_default_plot_size(14cm, 8cm)
n = 400
group = repeat([-1, 1], inner=200)
x = randn(n) .+ group

plot(x=x, color=categorical(group), Guide.colorkey(title="", pos=[3.6,0.7]),
    layer(Stat.density, Geom.line, Geom.polygon(fill=true, preserve_order=true), alpha=[0.4]),
    layer(Stat.quantile_bars(quantiles=[0.05, 0.95]), Geom.segment),
    Guide.title("Density with bars showing the central 90% CI"),
    Guide.ylabel("Density"), Coord.cartesian(xmin=-4, xmax=4)
)
x -14 -12 -10 -8 -6 -4 -2 0 2 4 6 8 10 12 14 -12.0 -11.5 -11.0 -10.5 -10.0 -9.5 -9.0 -8.5 -8.0 -7.5 -7.0 -6.5 -6.0 -5.5 -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 -20 -10 0 10 20 -12.0 -11.5 -11.0 -10.5 -10.0 -9.5 -9.0 -8.5 -8.0 -7.5 -7.0 -6.5 -6.0 -5.5 -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 h,j,k,l,arrows,drag to pan i,o,+,-,scroll,shift-drag to zoom r,dbl-click to reset c for coordinates ? for help ? -1 1 -0.6 -0.5 -0.4 -0.3 -0.2 -0.1 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 -0.50 -0.45 -0.40 -0.35 -0.30 -0.25 -0.20 -0.15 -0.10 -0.05 0.00 0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40 0.45 0.50 0.55 0.60 0.65 0.70 0.75 0.80 0.85 0.90 0.95 1.00 -0.5 0.0 0.5 1.0 -0.50 -0.48 -0.46 -0.44 -0.42 -0.40 -0.38 -0.36 -0.34 -0.32 -0.30 -0.28 -0.26 -0.24 -0.22 -0.20 -0.18 -0.16 -0.14 -0.12 -0.10 -0.08 -0.06 -0.04 -0.02 0.00 0.02 0.04 0.06 0.08 0.10 0.12 0.14 0.16 0.18 0.20 0.22 0.24 0.26 0.28 0.30 0.32 0.34 0.36 0.38 0.40 0.42 0.44 0.46 0.48 0.50 0.52 0.54 0.56 0.58 0.60 0.62 0.64 0.66 0.68 0.70 0.72 0.74 0.76 0.78 0.80 0.82 0.84 0.86 0.88 0.90 0.92 0.94 0.96 0.98 1.00 Density Density with bars showing the central 90% CI

Stat.dodge

using DataFrames, Gadfly, RDatasets, Statistics
set_default_plot_size(21cm, 8cm)
salaries = dataset("car","Salaries")
salaries.Salary /= 1000.0
salaries.Discipline = ["Discipline $(x)" for x in salaries.Discipline]
df = combine(groupby(salaries, [:Rank, :Discipline]), :Salary.=>mean)
df.label = string.(round.(Int, df.Salary_mean))

p1 = plot(df, x=:Discipline, y=:Salary_mean, color=:Rank,
    Scale.x_discrete(levels=["Discipline A", "Discipline B"]),
    label=:label, Geom.label(position=:centered), Stat.dodge(position=:stack),
    Geom.bar(position=:stack)
)
p2 = plot(df, y=:Discipline, x=:Salary_mean, color=:Rank,
    Coord.cartesian(yflip=true), Scale.y_discrete,
    label=:label, Geom.label(position=:right), Stat.dodge(axis=:y),
    Geom.bar(position=:dodge, orientation=:horizontal),
    Scale.color_discrete(levels=["Prof", "AssocProf", "AsstProf"]),
    Guide.yticks(orientation=:vertical), Guide.ylabel(nothing)
)
hstack(p1, p2)
Salary_mean -200 -150 -100 -50 0 50 100 150 200 250 300 350 -150 -140 -130 -120 -110 -100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 -200 0 200 400 -150 -145 -140 -135 -130 -125 -120 -115 -110 -105 -100 -95 -90 -85 -80 -75 -70 -65 -60 -55 -50 -45 -40 -35 -30 -25 -20 -15 -10 -5 0 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 100 105 110 115 120 125 130 135 140 145 150 155 160 165 170 175 180 185 190 195 200 205 210 215 220 225 230 235 240 245 250 255 260 265 270 275 280 285 290 295 300 Prof AssocProf AsstProf Rank