This page gives the code used to produce the diagrams in the report. Further comments will be added, and the code will be tidied, as time permits.
## **************************************************************************
## PATHS
## **************************************************************************
data <- "" ## data directory (including trailing '/')
figures <- "" ## directory for figures (including trailing '/')
setwd(data)
## **************************************************************************
## LIBRARIES
## **************************************************************************
require(plyr) ## tools that implement the split-apply-combine pattern in R
require(RColorBrewer) ## creates color palettes
require(memisc) ## imports SPSS .sav files
require(VGAM) ## functions for fitting vector generalized linear and additive models
require(scales) ## we use this for alpha (opacity) shading
## **************************************************************************
## CONSTANTS
## **************************************************************************
mm <- 0.0393700787 ## Graphics in R uses inches 1mm = 0.0393700787 inches
textwidth <- 175 * mm
ur8pal <- brewer.pal(8,"Paired")
ur8 <-
c(
"Large Urban Areas",
"Other Urban Areas",
"Accessible Small Towns",
"Remote Small Towns",
"Very Remote Small Towns",
"Accessible Rural",
"Remote Rural",
"Very Remote Rural"
)
ur6pal <- brewer.pal(6,"Paired")
ur6 <-
c(
"Large Urban Areas",
"Other Urban Areas",
"Accessible Small Towns",
"Remote Small Towns",
"Accessible Rural",
"Remote Rural"
)
## **************************************************************************
## OUTPUT produce three versions of a diagram
## **************************************************************************
mkall <-
function (name, path = figures){
mk <- function(myname=name, type, dpi=300){
print(paste(path, myname, ".", type, dpi, sep=""))
quartz.save(
file = paste(path, myname, ".", type, sep=""),
type = type,
width = textwidth,
pointsize = 11,
family = "Arial",
dpi = dpi)
}
mk(type = "pdf")
mk(type = "png", dpi = 288)
mk(myname = paste(name,"-thumb",sep=""), type = "png", dpi = 72)
}
## **************************************************************************
## DATA
## **************************************************************************
## get Ofcom data
## http://d2a9983j4okwzn.cloudfront.net/downloads/ofcom-uk-fixed-broadband-postcode-level-data-2013.zip
## ofcom uses postcodes with no spaces, so we
## process ofcom-part1-fixed-broadband-postcode-level-data-2013.csv with the following sed script
## to set headers, put postcodes in pc7 format, adjust anomalous values, and remove lines with no postcode:
## 1s/.*/postcode,status,slowlines,avespeed,medspeed,maxspeed,superfast,connections/
## /^[^ ]{6},/s/^([^ ]{3})/\1 /
## /^[^ ]{5},/s/^([^ ]{2})/\1 /
## s!N/A!!g
## s/<3/-1/g
## s/>=30/31/g
## /^,/d
ofcom <- read.csv("ofcom-uk-fixed-broadband-postcode-level-data-2013/ofcom-fixed-broadband-postcode-level-data-2013.csv")
## we use this data to give connection counts for almost all postcodes, and for most postcodes also
## median speed data, and postcodes with slowlines and superfast availability
## **************************************************************************
## get isd data from
## https://isdscotland.scot.nhs.uk/Products-and-Services/GPD-Support/Geography/Postcode-Reference-File/_docs/latestpcinfowithlinkpc.sav
isd <- as.data.set(spss.system.file('latestpcinfowithlinkpc.sav'))
isd$UrbRur8_2011_2012 <- as.ordered(isd$UrbRur8_2011_2012)
isd$UrbRur6_2011_2012 <- as.ordered(isd$UrbRur6_2011_2012)
isd <- subset(isd, is.na(Date_of_Deletion) & !is.na(Household_Count))
## we use this dataset to give household counts for each postcode
## and links to higher-level geographies, including datazones
## it includes 146,749 postcodes covering 2,545,790 households
## **************************************************************************
## keep only live postcodes, not those that have been deleted
## and only postcodes for which we have household count
## then join this with the ofcom data to get postcode-level data
pc <- merge(isd, subset(ofcom, connections >=0 & postcode!= ""), by.x="pc7",by.y="postcode")
## this dataset includes 145,968 postcodes, covering 2,538.280 households (99.7% of the isd total)
## we divide the households in each postcode into connected and offline
## if there are more connections than households we assume all households are connected
pc$connected <- pmin(pc$connections, pc$Household_Count)
pc$offline <- pc$Household_Count - pc$connected
## note that connected + offline == Household_Count
scothh <- sum(pc$Household_Count,na.rm=T)
ur6sizes <- ddply(pc,.(UrbRur6_2011_2012),summarise,hh=sum(Household_Count,na.rm=T))
CAsizes <- ddply(pc,.(CA),summarise,hh=sum(Household_Count,na.rm=T))
## compute offline and connected households by datazone
dz <- ddply(pc,.(Datazone),summarize,
households = sum(Household_Count, na.rm=T),
offline= sum(offline, na.rm=T),
connected= sum(connected, na.rm=T))
## check: with(dz,households == offline + connected)
## **************************************************************************
## get SIMD data for 6,505 datazones
## http://www.scotland.gov.uk/Resource/0041/00410767.xls
## export columns A-Y to SIMD/overall-domain.csv
simd <- read.csv("SIMD/overall-domain.csv")
dzsimd <- merge(simd,dz,by.x="datazone",by.y="Datazone")
## convert ranking to [-1,+1] spectrum by household count
byPop <- function (ranking, households) {
pop <- sum(households)
oo <- order(ranking)
rr <- order(oo)
hh <- cumsum(households[oo]) - households[oo]/2
2*hh[rr]/pop -1
}
mysimd <- with(
dzsimd,
data.frame(
Datazone = datazone,
isolation = ((geographic_domain_score - min(geographic_domain_score))
/diff(range(geographic_domain_score))),
isol = (byPop(geographic_domain_score,households) + 1)/2,
income = byPop(income_domain_rank,households),
housing = byPop(housing_domain_rank,households),
employment = byPop(employment_domain_rank,households),
health = byPop(health_domain_rank,households),
education = byPop(education_domain__rank,households),
crime = byPop(crime_2012_rank,households),
simd = byPop(simd_2012_rank,households)
))
pcsimd <- merge(pc, mysimd, by="Datazone")
pcsimd$decile <- cut(pcsimd$simd,breaks=10,include.lowest=T,labels=1:10)
## to fit a beta-binomial model we must restrict attention to postcodes with more than one household
model <- subset(pcsimd, Household_Count > 1)
## fit a beta binomial distribution to the postcode level data
pcglm <-
vglm(formula =
cbind(offline, connected)
~ 1,
family = betabinomial.ab,
data = model,
trace = TRUE
)
## fit a beta binomial distribution to the datazone level data
dzglm <-
vglm(formula =
cbind(offline, connected)
~ 1,
family = betabinomial.ab,
data = subset(dz,households > 1),
trace = TRUE
)
plotbeta <- function(a,b, h=0.32,col="red") {
unit<- seq(0,1,by=0.01)
q <- function (q) { return(qbeta(q, shape1=a,shape2=b))}
d <- function (x) { return(dbeta(x, shape1=a,shape2=b))}
lines(unit ,
dbeta(x=unit,
shape1=a,
shape2=b),
col= col,
lwd= 4)
if(h){ ## highlight P( p < 0.1 ) and P( p > 0.5 )
q50 <- q(0.5)
lines(c(q50,q50),c(0,d(q50)),lwd=2)
lines(c(0.5,0.5),c(0,d(0.5)),lwd=0.5)
p50 <- pbeta(0.5, shape1=a,shape2=b)
lines(c(0.1,0.1),c(0,d(0.1)),lwd=0.5)
p10 <- pbeta(0.1, shape1=a,shape2=b)
text(x= 0.51, y=h, labels=c(paste(round(100*(1-p50)),"%",sep="")),adj=0,cex=1.2)
text(x= 0.09, y=h, labels=c(paste(round(100*(p10)),"%",sep="")),adj=1,cex=1.2)
arrows(x0= c(0.5,0.1), y0=c(h,h)-0.12,x1= c(0.81,0.012), y1=c(h,h)-0.12,length=0.1,angle=15)
}
}
## III.1distribution-deprivation
graphics.off()
mai <- c(0.6,0.2,0.2,0.2)
sdz <- max(dbeta(x=seq(0,1,by=0.01) ,shape1=exp(coef(dzglm)[1]),shape2=exp(coef(dzglm)[2])))
spc <- max(dbeta(x=seq(0,1,by=0.01) ,shape1=exp(coef(pcglm)[1]),shape2=exp(coef(pcglm)[2])))
grDevices::quartz.options(width = 175 * mm,
height = 100 * mm + (mai[1]+mai[3]),
pointsize = 11)
par(mgp=c(2.1,1,0),mai=mai)
with (subset(dz,households > 1),
hist(offline/households,70,freq=F,
xlab="probability of a neighbour being offline", xlim=c(0,1),
ylab="relative frequency",
ylim=c(0,sdz), ## axis is extended to cover sdz * 1.08
main="",
yaxs = "i",xaxs = "i",xaxt="n",yaxt="n",bty='n')
)
axis(side=1,at=seq(0,1,by=0.2))
legend(x=0.6,y=1.8,legend=c("By Datazone","By Postcode"),fill=c("#A6CEE3","red"),bty="n",cex=1.3)
plotbeta(a=exp(coef(dzglm)[1]),b=exp(coef(dzglm)[2]),h=0,col=alpha("#A6CEE3",0.7))
plotbeta(a=exp(coef(pcglm)[1]),b=exp(coef(pcglm)[2]),h=0,col="red")
mkall("III.1distribution-deprivation")
## III.2uptake-dist
graphics.off()
## calculation to get the same distance between axis and plot for both figures
x <- 0.04*(sdz-spc)/1.08
grDevices::quartz.options(width = 175 * mm,
height = 100 * mm * (spc+ 0.08*sdz)/(sdz * 1.08) + (mai[1]+mai[3]),
pointsize = 11)
par(mgp=c(2.1,1,0),mai=mai)
plot(0, 0, type="n",
xlim=c(0,1),
ylim=c(-x,spc+x),
xlab="probability of a neighbour being offline",
xaxs = "i",
xaxt="n",yaxt="n",bty='n')
axis(side=1,at=seq(0,1,by=0.1))
for (i in 10:1){
xmax <- i/10
x <- seq(0,xmax,by=0.005)
polygon(c(0,x,xmax),
c(0, dbeta(x ,shape1=exp(coef(pcglm)[1]),shape2=exp(coef(pcglm)[2])) ,0),
col=c(brewer.pal(9,"Purples")[9:2], brewer.pal(7,"PRGn")[5:6])[11-i], lwd =0.01)
}
plotbeta(a=exp(coef(pcglm)[1]),b=exp(coef(pcglm)[2]),h=0.15)
lines(c(0,1),c(0,0))
mkall("III.2uptake-dist")
pcCAglm <-
vglm(formula =
cbind(offline,
connected)
~ CA,
family = betabinomial.ab,
data = model,
trace = TRUE
)
## III.3ByCAOffline
## column widths are proportional to household sums: compute column boundaries
test <- ddply(subset(pc,!is.na(CA)),.(CA),summarise,hh = sum(Household_Count,na.rm=T))
hh <- sum(test$hh)
test$cp <- cumsum(test$hh) ## r-hand boundary
test$xm <- test$cp - test$hh ## l-hand boundary
graphics.off()
mai <- c(0.9,0.1,0.3,0)
grDevices::quartz.options(width = 175 * mm,
height = 175 * mm + (mai[1]+mai[3]),
pointsize = 11)
par(mai=mai,bty='n')
plot(0, 0, type="n", asp=sum(test$hh),
xlim=c(0,hh),ylim=c(0,1),ylab="",xlab="",
xaxt="n",yaxt="n")
legend(x=hh/2, xjust=0.505,
y=1.01,yjust=0.3,
legend=c("< 10% ","< 20% ","< 30% ","< 40% ","< 50% ","< 60% ","< 70% ","< 80% ","< 90%"),
fill=rev(c(brewer.pal(9,"Purples")[8:2],brewer.pal(7,"PRGn")[5:6])),horiz=T,
text.width=hh/18,cex=0.9,bty="n")
mtext("percentage of neighbours offline",side=3)
text(test$cp - test$hh/2, -0.015, srt = 90, adj = 1,
labels = test$CA, xpd = TRUE,cex=0.7)
p <- exp(predict(pcCAglm,newdata=test))
for (g in 1:dim(p)[1]){
for (i in 10:1){
y <- pbeta(i/10, shape1=p[g,1], shape2=p[g,2])
x1 <- test$xm[g]
x2 <- test$cp[g]
polygon(c(x1,x1,x2,x2),
c(0,y,y,0),
col=c(brewer.pal(9,"Purples")[9:2],brewer.pal(7,"PRGn")[5:6])[11-i],lwd =0.5)
}
}
mkall("III.3ByCAOffline")
p <- exp(predict(pcCAglm,newdata=test))
par(mfrow=c(8,4), mar=c(0,0,2,0) + 0.1)
for (g in 1:dim(p)[1]){
plot(0, 0, type="n",
xlim=c(0,1),ylim=c(0,3),ylab="",xlab="",
xaxt="n",yaxt="n")
plotbeta(a=p[g,1],b=p[g,2],h=2.2)
mtext(test[g,1],side=3)
}
## isolation ******************************************
isolgam <-
vgam(formula =
cbind(offline,
connected)
~ s(isolation,df=6),
family = betabinomial.ab,
data = model,
trace = TRUE,
)
test <- data.frame(isol = seq(0,1, by=0.01),isolation = seq(0,1, by=0.01))
for ( i in 1:length(test$isol)){
test$isolation[i] <- max(c(0,subset(model,isol <= test$isol[i])$isolation))
}
graphics.off()
mai <- c(0.6,0.1,0.385,0.1)
grDevices::quartz.options(width = 175 * mm,
height = 175 * mm + (mai[1]+mai[3]),
pointsize = 11)
par(mai=mai,bty='n')
par(mgp=c(2,1,0),mar=c(3,0.5,2,0.5) + 0.1,bty='n',tck=0.01)
plot(0, 0, type="n",xlab="isolation centile", asp=1,
xlim=c(0,1),ylim=c(0,1),
xaxt="n",yaxt="n")
legend(x=0.5, xjust=0.51,
y=1.01,yjust=0,
legend=c("< 10% ","< 20% ","< 30% ","< 40% ","< 50% ","< 60% ","< 70% ","< 80% ","< 90%"),
fill=rev(c(brewer.pal(9,"Purples")[8:2],brewer.pal(7,"PRGn")[5:6])),horiz=T,
text.width=1/18,cex=0.9,bty="n")
mtext("percentage of neighbours offline",side=3)
axis(side=1,at=seq(0,1,by=0.2),
labels=seq(0,100,by=20))
p <- exp(predict(isolgam,newdata=test))
for (i in 10:1){
polygon(c(0,test$isol,1),
c(0,pbeta(i/10, shape1=p[,1],shape2=p[,2]),0),
col=c(brewer.pal(9,"Purples")[9:2], brewer.pal(7,"PRGn")[5:6])[11-i],lwd =0.5)
}
mkall("III.4isolation")
training <- subset(model, medspeed >= 4 & (isolation < 0.4 | (superfast == "Y" & slowlines == "N")))
training$slowlines <- factor(training$slowlines)
unit <- seq(0,1,by=0.01)
hh <- unit
pp <- sum(model$Household_Count,na.rm=T)
for ( i in 1:length(unit)){
hh[i] <- 2*sum(subset(model,isolation <= hh[i])$Household_Count,na.rm=T)/pp -1
}
g <- brewer.pal(7,"PRGn")[6]
p <- brewer.pal(9,"Purples")[9]
ploteff <- function(eff,mytitle="households offline",range=log(c(2/3,1.4)),mar=c(2,0.2,2,0.2)){
par(mar=mar)
N <- dim(eff)[1]
low <- seq(0,-1,by=-1)
lowh <- low
high <- seq(0,1,by=1)
highh <- high
yt <- seq(0.4,2.2,by=0.1)
## xx <- max(abs(eff[,1]))
plot(0,0, type="n", bty="n",
ylab="",cex.main=0.84,
ylim=c(0,N*2 +0.2),xlim=range,
xaxt="n",yaxt="n")
title(mytitle, line = 0.5,cex.main=0.84)
axis(side=1,
at=log(yt),
labels=Map(function(x)paste(ifelse(x>1,"+",""),signif((x-1)*100,2),"%",sep=""),yt),
cex.axis=0.8)
for (i in 1:N){
if (i == 7){
polygon(
c(unit * eff[i,1],0),c(hh,1)+2*i-1,
col=ifelse(eff[i,1]>0,p,g),
border=NA,
lwd =0.5)
polygon(c(unit*(eff[i,1]+2*eff[i,2]), rev(unit)*(eff[i,1]-2*eff[i,2])),
c(hh,rev(hh))+2*i-1,
border=NA,
col=alpha("white",0.6),lwd=0.01)
}
else {
polygon(
c(0,low * eff[i,1]),c(-1,low)+2*i-1,
border=NA,
col=ifelse(eff[i,1]>0,g,p),
lwd =0.5)
polygon(c(0,low * (eff[i,1]+2*eff[i,2]),rev(low) *(eff[i,1]-2*eff[i,2])),
c(0,low,rev(low))+2*i-1,
border=NA,
col=alpha("white",0.6),lwd=0.01)
polygon(
c(high * eff[i,1],0),c(highh,1)+2*i-1,
col=ifelse(eff[i,1]>0,p,g),
border=NA,
lwd =0.5)
polygon(c(0,high*(eff[i,1]+2*eff[i,2]), rev(high)*(eff[i,1]-2*eff[i,2])),
c(0,high,rev(high))+2*i-1,
border=NA,
col=alpha("white",0.6),lwd=0.01)
}
lines(range,c(2*i,2*i),lwd=0.2)
lines(range,c(2*i,2*i)-2,lwd=0.2)
}
lines(c(0,0),c(-2*N*0.04,2*N + 0.9),lwd=0.2)
lines(c(-xx,xx),c(0,0),lwd=0.2)
}
scotglm <- vglm(formula =
cbind(offline,
connected)
~ (income + employment + housing + health + education + crime)+slowlines + isolation,
family = betabinomial,
data = training, trace = TRUE)
CAglmab <- vglm(formula =
cbind(offline,
connected)
~ CA,
family = betabinomial.ab,
data = training, trace = TRUE)
scot2glm <-
vglm(formula =
cbind(offline,
connected)
~ slowlines + income + employment + housing
+ health + education + crime + isolation,
family = betabinomial(zero=NULL),
data = training, trace = TRUE)
scotcoef <- coef(summary(scot2glm))
medscot <- scotcoef[1,1]
st <- subset(training, CA !="Eilean Siar")
st$CA <- factor(st$CA)
CAglm <- vglm(formula =
cbind(offline,
connected)
~ CA +
(income + employment + housing + health + education + crime)+slowlines + isolation,
family = betabinomial(zero=NULL),
data = st, trace = TRUE)
CA1 <- coef(summary(CAglm))
CA2glm <-
vglm(formula =
cbind(offline,
connected)
~
slowlines + isolation + CA + CA:(income + employment + housing + health + education + crime),
family = betabinomial(zero=NULL),
data = st, trace = TRUE)
CAcoef <- coef(summary(CA2glm))
CAoffsets <- CAcoef[c(1,0:29 * 2 +7),1:2]
rownames(CAoffsets)[1] <- "CAAberdeen City:1"
CAoffsets[2:31,1] <- CAoffsets[2:31,1] + CAoffsets[1,1]
CAoffsets[,1] <- CAoffsets[,1] - medscot
## CAoffsets <- CA1[0:30 * 2 + 1,1:2]
## CAoffsets[,1] <- CAoffsets[,1]-medscot
graphics.off()
grDevices::quartz.options(width = 175 * mm,
height = 200 * mm ,
pointsize = 11)
par(oma=c(1,1,1,1),mfrow=c(1,1))
## eff <- coef(summary(scot2glm))[c(3:8,10),1:2]
eff <- scotcoef[c(0:6)*2 + 5,1:2]
xx <- max(abs(eff[,1]))
ploteff(eff,mytitle="Scotland",range=log(c(0.6,1.35)),mar=c(3,0.1,2,0.1))
text(log(0.6),(1:7)*2 -1,labels=c("income","employment","housing","health","education","crime","isolation"),adj=0)
mtext(side=1,"change in the odds of being offline",line=2.5)
mkall("III.5Scotlandfactors")
graphics.off()
grDevices::quartz.options(width = 200 * mm,
height = 200 * mm * sqrt(2),
pointsize = 11)
par(oma=c(1,1,1,1),mfrow=c(4,8))
ploteff(eff,mytitle="Scotland")
## cc <- coef(summary(CAglm))
cc <- CAcoef
for(i in 1:31){
## eff <- rbind(cc[(0:5) * 31 + i + 34,1:2],cc[34,1:2])
eff <- rbind(cc[(0:5) * 62 + 2*i + 65,1:2],cc[5:6,1:2])[1:7,]
ploteff(eff,
mar=c(2,0.4,2,0.4),
mytitle=sub("West","W.",(sub("East","E.",sub("North","N.",sub("South","S.",
levels(training$CA)[i]))))))
polygon(c(0,0,CAoffsets[i,1],CAoffsets[i,1]),c(0,1,1,0) * 0.5 + 14.4,col=ifelse(CAoffsets[i,1]>0,p,g),
border=NA,
lwd =0.5)
polygon(CAoffsets[i,1] + c(-CAoffsets[i,2],-CAoffsets[i,2],CAoffsets[i,2],CAoffsets[i,2])*2,c(0,1,1,0) * 0.5 + 14.4,col=alpha("white",0.6),
border=NA,
lwd =0.5)
polygon(-CAoffsets[i,1] + c(-CAoffsets[i,2],-CAoffsets[i,2],CAoffsets[i,2],CAoffsets[i,2])*2,c(0,1,1,0) * 0.5 + 14.4,col=alpha("white",0.6),
border=NA,
lwd =0.5)
}
mkall("III.6CAfactors")
graphics.off()
grDevices::quartz.options(width = 175 * mm,
height = 120 * mm,
pointsize = 11)
par(mgp=c(2.1,1,0),
mar=c(3,3,0,0) + 0.1)
plot(0, 0, type="n",
ylab="households offline",
xlab="isolation",
xlim=c(0,1),ylim=c(0.10,0.4),
xaxt="n",yaxt="n",
frame.plot=F)
axis(side=2,
at=seq(0,0.5, by=0.1),
labels=Map(function(x)paste(x,"%",sep=""),seq(0,50,by=10)))
axis(side=1,
at=seq(0,1,by=0.2),
labels=seq(0,100,by=20))
test <- data.frame(isolation = seq(0,1, by=0.01))
legend("top", ncol = 2,
legend=c("sfast = Y slow = Y","sfast = Y slow = N","sfast = N slow = Y","sfast = N slow = N"),
fill=c("red","green","blue","cyan"),
cex=1,
bty="n")
for (slow in c("Y","N")){
for (fast in c("N","Y")){
m <- subset(model, slowlines == slow & superfast == fast)
simdgam <-
vgam(formula =
cbind(offline,
connected)
~ s(isolation,df=5),
family = betabinomial.ab,
data = m,
trace = TRUE,
)
p <- exp(predict(simdgam))
symbols(m$isolation,
qbeta(0.5, shape1=p[,1],shape2=p[,2]),
circles=sqrt(m$Household_Count),
inches=0.1,
fg=alpha(
ifelse(fast=="Y",
ifelse(slow=="Y","red","green"),
ifelse(slow=="Y","blue","cyan")),
0.2),
bg=alpha(
ifelse(fast=="Y",
ifelse(slow=="Y","red","green"),
ifelse(slow=="Y","blue","cyan")),
0.2),
add=T)
}
}
mkall("1.3fs-isol")
## ********************************************
mai <- c(0.6,0.1,0.385,0.1)
grDevices::quartz.options(width = 175 * mm,
height = 175 * mm + (mai[1]+mai[3]),
pointsize = 11)
par(mgp=c(2.1,1,0),
mar=c(3,3,0,0) + 0.1)
plot(0, 0, type="n",
ylab="households offline",xlab="SIMD centile",
xlim=c(-1,1),
ylim=c(0.1,0.5),
xaxt="n",yaxt="n",
frame.plot=F)
axis(side=2,
at=seq(0,0.6,by=0.1),
labels=Map(function(x)paste(x,"%",sep=""),seq(0,60,by=10)))
axis(side=1,at=seq(-1,1,by=0.4),
labels=Map(function(x)paste(x,"",sep=""),seq(0,100,by=20)))
test <- data.frame(simd = seq(-1,1, by=0.01))
tmp <- legend("bottomleft",legend= paste(" k",ur6),
fill=ur6pal,cex=1,bty="n")
text(x=tmp$text$x + 0.115,
y= tmp$text$y-0.0001,
labels=formatC(signif(round(ur6sizes$hh/1000),3), big.mark=","),
adj=c(1),
cex=1)
for (i in 1:6){
m <- subset(model,UrbRur6_2011_2012 == i)
simdgam <-
vgam(formula =
cbind(offline,
connected)
~ s(simd),
family = betabinomial.ab,
data = m,
trace = TRUE,
)
p <- exp(predict(simdgam))
symbols(m$simd,
qbeta(0.5, shape1=p[,1],shape2=p[,2]),
circles= sqrt(pmax(m$Household_Count , m$connections)),
fg=alpha(ur6pal[i], 0.2),
bg=alpha(ur6pal[i], 0.2),
inches= 0.1,
add=T)
p <- exp(predict(simdgam,newdata=test))
## lines(test$simd,qbeta(0.2, shape1=p[,1],shape2=p[,2]), col=ur6pal[i],lwd=4)
## lines(test$simd,qbeta(0.8, shape1=p[,1],shape2=p[,2]), col=ur6pal[i],lwd=4)
}
mkall("1.4ur6-simd")
## ********************************************
graphics.off()
grDevices::quartz.options(width = 175 * mm,
height = 120 * mm,
pointsize = 11)
par(mgp=c(2.1,1,0),mfrow=c(1,1),
mar=c(3,3,0,1) + 0.1)
plot(0, 0, type="n",ylab="households offline",
xlab="median download speed (Mb/s)",
xlim=c(0,32),
ylim=c(0,0.45),
xaxt="n",yaxt="n",
frame.plot=F)
axis(side=2,
at=seq(0,0.5,by=0.1),
labels=Map(function(x)paste(x,"%",sep=""),seq(0,50,by=10)))
axis(side=1,at=seq(-0,35,by=5))
legend("bottom",legend=ur6,fill=ur6pal,cex=1,bty="n")
test <- data.frame(medspeed = seq(0,31,by=0.1))
with(model,
for (q in levels(decile)){
m <- subset(model,
decile == q & !is.na(medspeed))
speedgam <-
vgam(formula =
cbind(offline,
connected)
~ s(medspeed,df=4),
family = betabinomial.ab,
data = m,
trace = TRUE,
)
lines(test$medspeed,
predict(speedgam,
type="response",
newdata=test),
lwd=0.5)
symbols(m$medspeed,
predict(speedgam,type="response"),
circles=sqrt(m$Household_Count),
inches=0.1,
fg=alpha(ur6pal[m$UrbRur6_2011_2012],0.2),
bg=alpha(ur6pal[m$UrbRur6_2011_2012],0.2),
add=T)
text(32,
predict(speedgam,
type="response",
newdata=data.frame(medspeed=31)),
labels= c(q),adj=c(0,0.6))
}
)
mtext(side=4,"simd decile", adj=0.7)
mkall("1.5speed-simd-decile")
## Log-likelihood: -327283.6 on 266676 degrees of freedom
graphics.off()
mai <- c(0.6,0.2,0.2,0.2)
sdz <- max(dbeta(x=seq(0,1,by=0.01) ,shape1=exp(coef(dzglm)[1]),shape2=exp(coef(dzglm)[2])))
spc <- max(dbeta(x=seq(0,1,by=0.01) ,shape1=exp(coef(pcglm)[1]),shape2=exp(coef(pcglm)[2])))
grDevices::quartz.options(width = 175 * mm,
height = 100 * mm + (mai[1]+mai[3]),
pointsize = 11)
par(mgp=c(2.1,1,0),mai=mai)
## p <- exp(coef(pcglm)[])
## s = max(dbeta(seq(0,1,0.01) ,shape1=p[1],shape2=p[2]))
x1 = 1 + 0.1
x2 = x1 + 1/sdz
par(mgp=c(2.1,1,0),
mar=c(3,0,0,0)+0.1,
bty='n')
plot(0, 0, type="n",
xlab="Variation in the likelihood of being offline, across Scotland.",
xlim=c(0,x2),
ylim=c(0,sdz),
xaxt="n",yaxt="n")
axis(side=1,at=seq(0,1,by=0.2),
labels=Map(function(x)paste(x,"%",sep=""),seq(0,100,by=20)))
for (i in 10:1){
xmax <- i/10
x <- seq(0,xmax,by=0.005)
polygon(c(0,x,xmax),
c(0, dbeta(x ,shape1=exp(coef(pcglm)[1]),shape2=exp(coef(pcglm)[2])) ,0),
col=c(brewer.pal(9,"Purples")[9:2], brewer.pal(7,"PRGn")[5:6])[11-i], lwd =0.5)
}
for (i in 10:1){
y <- pbeta(i/10, shape1=exp(coef(pcglm)[1]), shape2=exp(coef(pcglm)[2])) * sdz
polygon(c(x1,x1,x2,x2),
c(0,y,y,0),
col=c(brewer.pal(9,"Purples")[9:2],brewer.pal(7,"PRGn")[5:6])[11-i],lwd =0.5)
if(i < 8){
text((x1 + x2)/2, y-0.03, adj=c(0.5,1),labels=paste("< ", 10*i,"%",sep=""))
}
}
text((x1 +x2)/2, par("usr")[3],labels="Scotland",adj=0.5, xpd = TRUE)
mkall("1.6ScotlandOffline")
## simd ******************************************
graphics.off()
mai <- c(0.6,0.1,0.385,0.1)
grDevices::quartz.options(width = 175 * mm,
height = 175 * mm + (mai[1]+mai[3]),
pointsize = 11)
par(mgp=c(2,1,0),mar=c(3,0.5,1,0.5) + 0.1,bty='n',tck=0.01)
plot(0, 0, type="n",xlab="SIMD centile",asp=2,
xlim=c(-1,1),ylim=c(0,1),
xaxt="n",yaxt="n")
legend(x=0, xjust=0.505,
y=1.01,yjust=0,
legend=c("< 10% ","< 20% ","< 30% ","< 40% ","< 50% ","< 60% ","< 70% ","< 80% ","< 90%"),
fill=rev(c(brewer.pal(9,"Purples")[8:2],brewer.pal(7,"PRGn")[5:6])),horiz=T,
text.width=2/18,cex=0.9,bty="n")
mtext("percentage of neighbours offline",side=3)
axis(side=1,at=seq(-1,1,by=0.4),
labels=seq(0,100,by=20))
simdgam <-
vgam(formula =
cbind(offline,
connected)
~ s(simd,df=7),
family = betabinomial.ab,
data = model,
trace = TRUE,
)
## Log-likelihood: -318518.7 on 266669.2 degrees of freedom
test <- data.frame(simd = seq(-1,1, by=0.01))
p <- exp(predict(simdgam,newdata=test))
for (i in 10:1){
polygon(c(-1,test$simd,1),
c(0,pbeta(i/10, shape1=p[,1],shape2=p[,2]),0),
col=c(brewer.pal(9,"Purples")[9:2], brewer.pal(7,"PRGn")[5:6])[11-i],lwd =0.5)
}
mkall("1.7simd")
## **************************************************************************