0%

Boxplot

Boxplot

设计思路

疫情数据找来找去都太不合适了,样本差距比较大,数据不集中后来我就找了气温的数据。

使用的是2020年青岛6、7、8月份的日高低温[1]

先找一个模板[2],然后修改。

先是写了高温的数据,然后在旁边显示低温数据,用于对比。

效果图

Qingdao daily high and low temperature of Jun, Jul and Aug, 2020

HTML源代码

<html>
<head><p>Qingdao daily high and low temperature of Jun, Jul and Aug, 2020</p></head>
<body>
<meta charset = "utf-8">
<script src = "https://d3js.org/d3.v4.js"></script>
<div id = "qdtempera"></div>
<script>
var p = d3.select("body").selectAll("p");
p.style("color","#6d6875").style("font-size","24px");

var margin = {top: 10, right: 30, bottom: 30, left: 40},
width = 460 - margin.left - margin.right,
height = 400 - margin.top - margin.bottom;

var svg = d3.select("#qdtempera")
.append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");

svg.append("text")
.attr("x", 6)
.attr("y", 0)
.text("Temperature / ℃")
.style("font-size", "10px")
.attr("alignment-baseline","middle")

d3.csv("https://raw.githubusercontent.com/Co10/d3_files/master/CSV_files/02/qingdao_temperature.csv", function(data) {

var sumstat = d3.nest()
.key(function(d) { return d.month; })
.rollup(function(d) {
q1 = d3.quantile(d.map(function(g) { return g.high;}).sort(d3.ascending),.25)
median = d3.quantile(d.map(function(g) { return g.high;}).sort(d3.ascending),.5)
q3 = d3.quantile(d.map(function(g) { return g.high;}).sort(d3.ascending),.75)
interQuantileRange = q3 - q1
min = q1 - 1.5 * interQuantileRange;
max = q3 + 1.5 * interQuantileRange;
return({q1: q1, median: median, q3: q3, interQuantileRange: interQuantileRange, min: min, max: max})
})
.entries(data)

var sumstat2 = d3.nest()
.key(function(d) { return d.month; })
.rollup(function(d) {
q1 = d3.quantile(d.map(function(g) { return g.low;}).sort(d3.ascending),.25)
median = d3.quantile(d.map(function(g) { return g.low;}).sort(d3.ascending),.5)
q3 = d3.quantile(d.map(function(g) { return g.low;}).sort(d3.ascending),.75)
interQuantileRange = q3 - q1
min = q1 - 1.5 * interQuantileRange;
max = q3 + 1.5 * interQuantileRange;
return({q1: q1, median: median, q3: q3, interQuantileRange: interQuantileRange, min: min, max: max})
})
.entries(data)

var color1 = ["#e85d04", "#f48c06", "#faa307", "#ffba08"];
var color2 = ["#00b4d8", "#48cae4", "#90e0ef", "#ade8f4"];

var x = d3.scaleBand()
.range([0, width])
.domain(["Jun", "Jul", "Aug"])
.paddingInner(1)
.paddingOuter(.5)
svg.append("g")
.attr("transform", "translate(0," + height + ")")
.call(d3.axisBottom(x))

var y = d3.scaleLinear()
.domain([13,33])
.range([height, 0])
svg.append("g").call(d3.axisLeft(y))

var fillopa = 0.8;

svg.selectAll("vertLines")
.data(sumstat)
.enter()
.append("line")
.attr("x1", function(d){return(x(d.key))})
.attr("x2", function(d){return(x(d.key))})
.attr("y1", function(d){return(y(d.value.min))})
.attr("y2", function(d){return(y(d.value.max))})
.attr("fill-opacity",fillopa)
.attr("stroke", color1[1])
.style("stroke-dasharray", ("4, 4"))

var s2offset = 20;
var s20 = svg.selectAll("vertLines")
.data(sumstat2)
.enter()
.append("line")
.attr("x1", function(d){return(x(d.key)+s2offset)})
.attr("x2", function(d){return(x(d.key)+s2offset)})
.attr("y1", function(d){return(y(d.value.min))})
.attr("y2", function(d){return(y(d.value.max))})
.attr("fill-opacity",fillopa)
.attr("stroke", color2[1])
.style("stroke-dasharray", ("4, 4"))

var boxWidth = 100;
svg.selectAll("boxes")
.data(sumstat)
.enter()
.append("rect")
.attr("x", function(d){return(x(d.key)-boxWidth/2)})
.attr("y", function(d){return(y(d.value.q3))})
.attr("height", function(d){return(y(d.value.q1)-y(d.value.q3))})
.attr("width", boxWidth )
.attr("fill-opacity",fillopa)
.attr("stroke", color1[1])
.attr("fill-opacity",fillopa)
.style("fill", color1[0]);

var s21 = svg.selectAll("boxes")
.data(sumstat2)
.enter()
.append("rect")
.attr("x", function(d){return(x(d.key)-boxWidth/2 + s2offset)})
.attr("y", function(d){return(y(d.value.q3))})
.attr("height", function(d){return(y(d.value.q1)-y(d.value.q3))})
.attr("width", boxWidth )
.attr("fill-opacity",fillopa)
.attr("stroke", color2[1])
.attr("fill-opacity",fillopa)
.style("fill", color2[0]);

svg.selectAll("medianLines")
.data(sumstat)
.enter()
.append("line")
.attr("x1", function(d){return(x(d.key)-boxWidth/2) })
.attr("x2", function(d){return(x(d.key)+boxWidth/2) })
.attr("y1", function(d){return(y(d.value.median))})
.attr("y2", function(d){return(y(d.value.median))})
.attr("fill-opacity",fillopa)
.attr("stroke", color1[2])
.style("width", 80);

var s22 = svg.selectAll("medianLines")
.data(sumstat2)
.enter()
.append("line")
.attr("x1", function(d){return(x(d.key)-boxWidth/2+s2offset) })
.attr("x2", function(d){return(x(d.key)+boxWidth/2+s2offset) })
.attr("y1", function(d){return(y(d.value.median))})
.attr("y2", function(d){return(y(d.value.median))})
.attr("fill-opacity",fillopa)
.attr("stroke", color2[2])
.style("width", 80);

var jitterWidth = 60;
svg.selectAll("indPoints")
.data(data)
.enter()
.append("circle")
.attr("cx", function(d){return(x(d.month) - jitterWidth/2 + Math.random()*jitterWidth )})
.attr("cy", function(d){return(y(d.high))})
.attr("r", 4)
.attr("fill-opacity",fillopa)
.style("fill", color1[3]);

var s23 = svg.selectAll("indPoints")
.data(data)
.enter()
.append("circle")
.attr("cx", function(d){return(x(d.month) - jitterWidth/2 + Math.random()*jitterWidth + s2offset )})
.attr("cy", function(d){return(y(d.low))})
.attr("r", 4)
.attr("fill-opacity",fillopa)
.style("fill", color2[3]);
});
</script>
</body>
</html>

实现方法

d3.nest()分组数据,然后计算Q1Q2Q3、Interquartile range (IQR)、maxmin [3]

Q1 为0.25的数据点,Q2 为中间数,Q3 为0.75的数据点,IQR 为四分位差。

最大值区间:

最小值区间:

后面的数据点用Math.random()增加 x 方向的偏移量,避免集中在一条直线上。

低温的数据直接 copy 高温数据建立的代码就行,然后增加 x 方向的偏移量和透明度,再改个颜色,这样好看许多。

Summary

首先要了解 boxplot 是什么,几个点怎么计算的。

boxplot 的数据选择一定要选数据点集中的,不然做不起来。

boxplot 可以反映出数据集中在哪里,围绕着哪条线上下波动。

Reference

[1] http://www.weather.com.cn/weather40dn/101120201.shtml

[2] https://www.d3-graph-gallery.com/graph/boxplot_show_individual_points.html

[3] https://en.wikipedia.org/wiki/Box_plot