r - How to visualise the difference between probability distribution functions? -


i try visualise difference between 2 histograms of distribution functions such difference in following 2 curves :

enter image description here

when difference big, plot 2 curves on top of each other , fill difference denoted above, though when difference becomes small, cumbersome. way plot this, plotting difference follows :

enter image description here

however, seems hard read seeing such graph first time, wondering: there other way can visualise difference between 2 distribution functions ?

i thought maybe might option combine 2 propositions, while scaling differences make them visible.

what follows attempt ggplot2. quite bit more involved thought, , i'm not hundred percent satisfied result; maybe helps nevertheless. comments , improvements welcome.

library(ggplot2) library(dplyr)  ## function replicates default ggplot2 colors ## taken [1] gg_color_hue <- function(n) {   hues = seq(15, 375, length=n+1)   hcl(h=hues, l=65, c=100)[1:n] }  ## set sample data set.seed(1) n <- 2000 x1 <- rlnorm(n, 0, 1) x2 <- rlnorm(n, 0, 1.1) df <- bind_rows(data.frame(sample=1, x=x1), data.frame(sample=2, x=x2)) %>%   mutate(sample = as.factor(sample))  ## calculate density estimates g1 <- ggplot(df, aes(x=x, group=sample, colour=sample)) +   geom_density(data = df) + xlim(0, 10) gg1 <- ggplot_build(g1)  ## use these estimates (available @ same x coordinates!) ## calculating differences. ## inspired [2] x <- gg1$data[[1]]$x[gg1$data[[1]]$group == 1] y1 <- gg1$data[[1]]$y[gg1$data[[1]]$group == 1] y2 <- gg1$data[[1]]$y[gg1$data[[1]]$group == 2] df2 <- data.frame(x = x, ymin = pmin(y1, y2), ymax = pmax(y1, y2),                    side=(y1<y2), ydiff = y2-y1) g2 <- ggplot(df2) +    geom_ribbon(aes(x = x, ymin = ymin, ymax = ymax, fill = side, alpha = 0.5)) +    geom_line(aes(x = x, y = 5 * abs(ydiff), colour = side)) +    geom_area(aes(x = x, y = 5 * abs(ydiff), fill = side, alpha = 0.4)) g3 <- g2 +     geom_density(data = df, size = 1, aes(x = x, group = sample, colour = sample)) +    xlim(0, 10) +    guides(alpha = false, colour = false) +    ylab("curves: density\n shaded area: 5 * difference of densities") +    scale_fill_manual(name = "samples", labels = 1:2, values = gg_color_hue(2)) +    scale_colour_manual(limits = list(1, 2, false, true), values = rep(gg_color_hue(2), 2))  print(g3) 

enter image description here

sources: so answer 1, so answer 2


as suggested @gregor in comments, here's version 2 separate plots below eachother sharing same x axis scaling. @ least legends should tweaked.

library(ggplot2) library(dplyr) library(grid)  ## function replicates default ggplot2 colors ## taken [1] gg_color_hue <- function(n) {   hues = seq(15, 375, length=n+1)   hcl(h=hues, l=65, c=100)[1:n] }  ## set sample data set.seed(1) n <- 2000 x1 <- rlnorm(n, 0, 1) x2 <- rlnorm(n, 0, 1.1) df <- bind_rows(data.frame(sample=1, x=x1), data.frame(sample=2, x=x2)) %>%   mutate(sample = as.factor(sample))  ## calculate density estimates g1 <- ggplot(df, aes(x=x, group=sample, colour=sample)) +   geom_density(data = df) + xlim(0, 10) gg1 <- ggplot_build(g1)  ## use these estimates (available @ same x coordinates!) ## calculating differences. ## inspired [2] x <- gg1$data[[1]]$x[gg1$data[[1]]$group == 1] y1 <- gg1$data[[1]]$y[gg1$data[[1]]$group == 1] y2 <- gg1$data[[1]]$y[gg1$data[[1]]$group == 2] df2 <- data.frame(x = x, ymin = pmin(y1, y2), ymax = pmax(y1, y2),                    side=(y1<y2), ydiff = y2-y1) g2 <- ggplot(df2) +    geom_ribbon(aes(x = x, ymin = ymin, ymax = ymax, fill = side, alpha = 0.5)) +    geom_density(data = df, size = 1, aes(x = x, group = sample, colour = sample)) +   xlim(0, 10) +   guides(alpha = false, fill = false) g3 <- ggplot(df2) +    geom_line(aes(x = x, y = abs(ydiff), colour = side)) +    geom_area(aes(x = x, y = abs(ydiff), fill = side, alpha = 0.4)) +    guides(alpha = false, fill = false) ## see [3] grid.draw(rbind(ggplotgrob(g2), ggplotgrob(g3), size="last")) 

enter image description here

... or abs(ydiff) replaced ydiff in construction of second plot: enter image description here

source: so answer 3


Comments

Popular posts from this blog

cakephp - simple blog with croogo -

How to group boxplot outliers in gnuplot -

bash - Performing variable substitution in a string -