Digital Scotland: Code

This page gives the code used to produce the diagrams in the report. Further comments will be added, and the code will be tidied, as time permits.


## **************************************************************************
## PATHS
## **************************************************************************
data <- "" ## data directory (including trailing '/')
figures <- "" ## directory for figures (including trailing '/')

setwd(data)
## **************************************************************************
## LIBRARIES
## **************************************************************************
require(plyr)         ## tools that implement the split-apply-combine pattern in R
require(RColorBrewer) ## creates color palettes
require(memisc)       ## imports SPSS .sav files
require(VGAM)         ## functions for fitting vector generalized linear and additive models
require(scales)       ## we use this for alpha (opacity) shading
## **************************************************************************
## CONSTANTS
## **************************************************************************
mm <- 0.0393700787 ## Graphics in R uses inches 1mm =  0.0393700787 inches
textwidth <- 175 * mm
ur8pal <- brewer.pal(8,"Paired")
ur8 <-
    c(
        "Large Urban Areas",
        "Other Urban Areas",
        "Accessible Small Towns",
        "Remote Small Towns",
        "Very Remote Small Towns",
        "Accessible Rural",
        "Remote Rural",
        "Very Remote Rural"
        )
ur6pal <- brewer.pal(6,"Paired")
ur6 <-
    c(
        "Large Urban Areas",
        "Other Urban Areas",
        "Accessible Small Towns",
        "Remote Small Towns",
        "Accessible Rural",
        "Remote Rural"
        )
## **************************************************************************
## OUTPUT produce three versions of a diagram
## **************************************************************************

mkall <-
    function (name, path = figures){
        mk <- function(myname=name, type, dpi=300){
            print(paste(path, myname, ".", type, dpi, sep=""))
            quartz.save(
                file = paste(path, myname, ".", type, sep=""),
                type = type,
                width = textwidth,
                pointsize = 11,
                family = "Arial",
                dpi = dpi)
        }
        mk(type = "pdf")
        mk(type = "png", dpi = 288)
        mk(myname = paste(name,"-thumb",sep=""), type = "png", dpi = 72)
    }

## **************************************************************************
## DATA
## **************************************************************************
## get Ofcom data
## http://d2a9983j4okwzn.cloudfront.net/downloads/ofcom-uk-fixed-broadband-postcode-level-data-2013.zip
## ofcom uses postcodes with no spaces, so we
## process ofcom-part1-fixed-broadband-postcode-level-data-2013.csv with the following sed script
## to set headers, put postcodes in pc7 format, adjust anomalous values, and remove lines with no postcode:
## 1s/.*/postcode,status,slowlines,avespeed,medspeed,maxspeed,superfast,connections/
## /^[^ ]{6},/s/^([^ ]{3})/\1 /
## /^[^ ]{5},/s/^([^ ]{2})/\1  /
## s!N/A!!g
## s/<3/-1/g
## s/>=30/31/g
## /^,/d
ofcom <- read.csv("ofcom-uk-fixed-broadband-postcode-level-data-2013/ofcom-fixed-broadband-postcode-level-data-2013.csv")
## we use this data to give connection counts for almost all postcodes, and for most postcodes also
## median speed data, and postcodes with slowlines and superfast availability
## **************************************************************************
## get isd data from
## https://isdscotland.scot.nhs.uk/Products-and-Services/GPD-Support/Geography/Postcode-Reference-File/_docs/latestpcinfowithlinkpc.sav
isd <- as.data.set(spss.system.file('latestpcinfowithlinkpc.sav'))
isd$UrbRur8_2011_2012 <- as.ordered(isd$UrbRur8_2011_2012)
isd$UrbRur6_2011_2012 <- as.ordered(isd$UrbRur6_2011_2012)
isd <- subset(isd, is.na(Date_of_Deletion) & !is.na(Household_Count))
## we use this dataset to give household counts for each postcode
## and links to higher-level geographies, including datazones
## it includes 146,749 postcodes covering 2,545,790 households
## **************************************************************************
## keep only live postcodes, not those that have been deleted
## and only postcodes for which we have household count
## then join this with the ofcom data to get postcode-level data
pc <- merge(isd, subset(ofcom, connections >=0 & postcode!= ""), by.x="pc7",by.y="postcode")
## this dataset includes 145,968 postcodes, covering 2,538.280 households (99.7% of the isd total)

## we divide the households in each postcode into connected and offline
## if there are more connections than households we assume all households are connected
pc$connected <- pmin(pc$connections, pc$Household_Count)
pc$offline   <- pc$Household_Count - pc$connected
## note that connected + offline == Household_Count

scothh   <- sum(pc$Household_Count,na.rm=T)
ur6sizes <- ddply(pc,.(UrbRur6_2011_2012),summarise,hh=sum(Household_Count,na.rm=T))
CAsizes  <- ddply(pc,.(CA),summarise,hh=sum(Household_Count,na.rm=T))

## compute offline and connected households by datazone
dz <- ddply(pc,.(Datazone),summarize,
            households = sum(Household_Count, na.rm=T),
            offline= sum(offline, na.rm=T),
            connected= sum(connected, na.rm=T))
## check: with(dz,households == offline + connected)


## **************************************************************************
## get SIMD data for 6,505 datazones
## http://www.scotland.gov.uk/Resource/0041/00410767.xls
## export columns A-Y to SIMD/overall-domain.csv
simd <- read.csv("SIMD/overall-domain.csv")

dzsimd <- merge(simd,dz,by.x="datazone",by.y="Datazone")

## convert ranking to [-1,+1] spectrum by household count
byPop <- function (ranking, households) {
    pop <- sum(households)
    oo <-  order(ranking)
    rr <- order(oo)
    hh <- cumsum(households[oo]) - households[oo]/2
    2*hh[rr]/pop -1
}

mysimd <- with(
    dzsimd,
    data.frame(
        Datazone   = datazone,
        isolation  = ((geographic_domain_score - min(geographic_domain_score))
                      /diff(range(geographic_domain_score))),
        isol       = (byPop(geographic_domain_score,households) + 1)/2,
        income     = byPop(income_domain_rank,households), 
        housing    = byPop(housing_domain_rank,households), 
        employment = byPop(employment_domain_rank,households), 
        health     = byPop(health_domain_rank,households), 
        education  = byPop(education_domain__rank,households), 
        crime      = byPop(crime_2012_rank,households), 
        simd       = byPop(simd_2012_rank,households) 
        ))

pcsimd <- merge(pc, mysimd, by="Datazone")
pcsimd$decile <- cut(pcsimd$simd,breaks=10,include.lowest=T,labels=1:10)

## to fit a beta-binomial model we must restrict attention to postcodes with more than one household
model <- subset(pcsimd, Household_Count > 1)

## fit a beta binomial distribution to the postcode level data
pcglm <-
    vglm(formula =
         cbind(offline, connected)
         ~ 1,
         family = betabinomial.ab,
         data = model,
         trace = TRUE
         )
## fit a beta binomial distribution to the datazone level data
dzglm <-
    vglm(formula =
         cbind(offline, connected)
         ~ 1,
         family = betabinomial.ab,
         data = subset(dz,households > 1),
         trace = TRUE
         )

plotbeta <- function(a,b, h=0.32,col="red") {
    unit<- seq(0,1,by=0.01)
    q <- function (q) { return(qbeta(q, shape1=a,shape2=b))}
    d <- function (x) { return(dbeta(x, shape1=a,shape2=b))}
    
    lines(unit ,
          dbeta(x=unit,
                shape1=a,
                shape2=b),
          col= col,
          lwd= 4)
    
    if(h){ ## highlight P( p < 0.1 ) and P( p > 0.5 )
        q50 <- q(0.5)
        lines(c(q50,q50),c(0,d(q50)),lwd=2)
        lines(c(0.5,0.5),c(0,d(0.5)),lwd=0.5)
        p50 <- pbeta(0.5, shape1=a,shape2=b)
        lines(c(0.1,0.1),c(0,d(0.1)),lwd=0.5)
        p10 <- pbeta(0.1, shape1=a,shape2=b)
        text(x= 0.51, y=h, labels=c(paste(round(100*(1-p50)),"%",sep="")),adj=0,cex=1.2)
        text(x= 0.09, y=h, labels=c(paste(round(100*(p10)),"%",sep="")),adj=1,cex=1.2)
        arrows(x0= c(0.5,0.1), y0=c(h,h)-0.12,x1= c(0.81,0.012), y1=c(h,h)-0.12,length=0.1,angle=15)
    }
}


## III.1distribution-deprivation
graphics.off()
mai <- c(0.6,0.2,0.2,0.2)
sdz <- max(dbeta(x=seq(0,1,by=0.01) ,shape1=exp(coef(dzglm)[1]),shape2=exp(coef(dzglm)[2])))
spc <- max(dbeta(x=seq(0,1,by=0.01) ,shape1=exp(coef(pcglm)[1]),shape2=exp(coef(pcglm)[2])))
grDevices::quartz.options(width  = 175 * mm,
                          height = 100 * mm +  (mai[1]+mai[3]),
                          pointsize = 11)
par(mgp=c(2.1,1,0),mai=mai)
with (subset(dz,households > 1),
      hist(offline/households,70,freq=F,
           xlab="probability of a neighbour being offline", xlim=c(0,1),
           ylab="relative frequency",
           ylim=c(0,sdz), ## axis is extended to cover sdz * 1.08
           main="",
           yaxs = "i",xaxs = "i",xaxt="n",yaxt="n",bty='n')
      )
axis(side=1,at=seq(0,1,by=0.2))
legend(x=0.6,y=1.8,legend=c("By Datazone","By Postcode"),fill=c("#A6CEE3","red"),bty="n",cex=1.3)
plotbeta(a=exp(coef(dzglm)[1]),b=exp(coef(dzglm)[2]),h=0,col=alpha("#A6CEE3",0.7))
plotbeta(a=exp(coef(pcglm)[1]),b=exp(coef(pcglm)[2]),h=0,col="red")
mkall("III.1distribution-deprivation")

## III.2uptake-dist
graphics.off()
## calculation to get the same distance between axis and plot for both figures
x <- 0.04*(sdz-spc)/1.08
grDevices::quartz.options(width  = 175 * mm,
                          height = 100 * mm * (spc+ 0.08*sdz)/(sdz * 1.08) + (mai[1]+mai[3]),
                          pointsize = 11)
par(mgp=c(2.1,1,0),mai=mai)
plot(0, 0, type="n",
     xlim=c(0,1),
     ylim=c(-x,spc+x),
     xlab="probability of a neighbour being offline",
     xaxs = "i",
     xaxt="n",yaxt="n",bty='n')
axis(side=1,at=seq(0,1,by=0.1))
for (i in 10:1){
    xmax <- i/10
    x <- seq(0,xmax,by=0.005)
    polygon(c(0,x,xmax),
            c(0, dbeta(x ,shape1=exp(coef(pcglm)[1]),shape2=exp(coef(pcglm)[2])) ,0),
            col=c(brewer.pal(9,"Purples")[9:2], brewer.pal(7,"PRGn")[5:6])[11-i], lwd =0.01)
}
plotbeta(a=exp(coef(pcglm)[1]),b=exp(coef(pcglm)[2]),h=0.15)
lines(c(0,1),c(0,0))
mkall("III.2uptake-dist")               

pcCAglm <-
    vglm(formula =
         cbind(offline,
               connected)
         ~ CA,
         family = betabinomial.ab,
         data = model,
         trace = TRUE
         )

## III.3ByCAOffline
## column widths are proportional to household sums: compute column boundaries
test <- ddply(subset(pc,!is.na(CA)),.(CA),summarise,hh = sum(Household_Count,na.rm=T))
hh <- sum(test$hh)
test$cp <- cumsum(test$hh)    ## r-hand boundary
test$xm <- test$cp - test$hh  ## l-hand boundary
graphics.off()
mai <- c(0.9,0.1,0.3,0)
grDevices::quartz.options(width  = 175 * mm,
                          height = 175 * mm + (mai[1]+mai[3]),
                          pointsize = 11)
par(mai=mai,bty='n')
plot(0, 0, type="n", asp=sum(test$hh),
     xlim=c(0,hh),ylim=c(0,1),ylab="",xlab="",
     xaxt="n",yaxt="n")
legend(x=hh/2, xjust=0.505,
       y=1.01,yjust=0.3,
       legend=c("< 10% ","< 20% ","< 30% ","< 40% ","< 50% ","< 60% ","< 70% ","< 80% ","< 90%"),
       fill=rev(c(brewer.pal(9,"Purples")[8:2],brewer.pal(7,"PRGn")[5:6])),horiz=T,
       text.width=hh/18,cex=0.9,bty="n")
mtext("percentage of neighbours offline",side=3)
text(test$cp - test$hh/2, -0.015, srt = 90, adj = 1,
     labels = test$CA, xpd = TRUE,cex=0.7)
p <- exp(predict(pcCAglm,newdata=test))
for (g in 1:dim(p)[1]){
    for (i in 10:1){
        y <- pbeta(i/10, shape1=p[g,1], shape2=p[g,2])
        x1 <- test$xm[g]
        x2 <- test$cp[g]
        polygon(c(x1,x1,x2,x2),
                c(0,y,y,0),
                col=c(brewer.pal(9,"Purples")[9:2],brewer.pal(7,"PRGn")[5:6])[11-i],lwd =0.5) 
    }
}
mkall("III.3ByCAOffline")

p <- exp(predict(pcCAglm,newdata=test))
par(mfrow=c(8,4), mar=c(0,0,2,0) + 0.1)
for (g in 1:dim(p)[1]){
    plot(0, 0, type="n", 
         xlim=c(0,1),ylim=c(0,3),ylab="",xlab="",
         xaxt="n",yaxt="n")
    plotbeta(a=p[g,1],b=p[g,2],h=2.2)
    mtext(test[g,1],side=3)
}


## isolation ******************************************

isolgam <-
    vgam(formula =
         cbind(offline,
               connected)
         ~ s(isolation,df=6),
         family = betabinomial.ab,
         data = model,
         trace = TRUE,
         )
test <- data.frame(isol = seq(0,1, by=0.01),isolation = seq(0,1, by=0.01))
for ( i in 1:length(test$isol)){
    test$isolation[i] <- max(c(0,subset(model,isol <= test$isol[i])$isolation))
}
graphics.off()
mai <- c(0.6,0.1,0.385,0.1)
grDevices::quartz.options(width  = 175 * mm,
                          height = 175 * mm + (mai[1]+mai[3]),
                          pointsize = 11)
par(mai=mai,bty='n')
par(mgp=c(2,1,0),mar=c(3,0.5,2,0.5) + 0.1,bty='n',tck=0.01)
plot(0, 0, type="n",xlab="isolation centile", asp=1,
     xlim=c(0,1),ylim=c(0,1),
     xaxt="n",yaxt="n")
legend(x=0.5, xjust=0.51,
       y=1.01,yjust=0,
       legend=c("< 10% ","< 20% ","< 30% ","< 40% ","< 50% ","< 60% ","< 70% ","< 80% ","< 90%"),
       fill=rev(c(brewer.pal(9,"Purples")[8:2],brewer.pal(7,"PRGn")[5:6])),horiz=T,
       text.width=1/18,cex=0.9,bty="n")
mtext("percentage of neighbours offline",side=3)
axis(side=1,at=seq(0,1,by=0.2),
     labels=seq(0,100,by=20))
p <- exp(predict(isolgam,newdata=test))
for (i in 10:1){
    polygon(c(0,test$isol,1),
            c(0,pbeta(i/10, shape1=p[,1],shape2=p[,2]),0),
            col=c(brewer.pal(9,"Purples")[9:2], brewer.pal(7,"PRGn")[5:6])[11-i],lwd =0.5)
}
mkall("III.4isolation")

training <- subset(model, medspeed >= 4 & (isolation < 0.4 | (superfast == "Y" & slowlines == "N")))
training$slowlines <- factor(training$slowlines)


unit <- seq(0,1,by=0.01)
hh <- unit
pp <- sum(model$Household_Count,na.rm=T)
for ( i in 1:length(unit)){
    hh[i] <- 2*sum(subset(model,isolation <= hh[i])$Household_Count,na.rm=T)/pp -1
}



g <- brewer.pal(7,"PRGn")[6]
p <- brewer.pal(9,"Purples")[9]

ploteff <- function(eff,mytitle="households offline",range=log(c(2/3,1.4)),mar=c(2,0.2,2,0.2)){
    par(mar=mar)
    N <- dim(eff)[1]
    low <- seq(0,-1,by=-1)
    lowh <- low
    high <- seq(0,1,by=1)
    highh <- high
    yt <- seq(0.4,2.2,by=0.1)
    ## xx <- max(abs(eff[,1]))
    plot(0,0, type="n", bty="n",
         ylab="",cex.main=0.84,
         ylim=c(0,N*2 +0.2),xlim=range,
         xaxt="n",yaxt="n")
    title(mytitle, line = 0.5,cex.main=0.84)
    axis(side=1,
         at=log(yt),
         labels=Map(function(x)paste(ifelse(x>1,"+",""),signif((x-1)*100,2),"%",sep=""),yt),
         cex.axis=0.8)


    for (i in 1:N){
        if (i == 7){
            polygon(
                c(unit * eff[i,1],0),c(hh,1)+2*i-1,
                col=ifelse(eff[i,1]>0,p,g),
                border=NA,
                lwd =0.5)
            polygon(c(unit*(eff[i,1]+2*eff[i,2]), rev(unit)*(eff[i,1]-2*eff[i,2])),
                    c(hh,rev(hh))+2*i-1,
                    border=NA,
                    col=alpha("white",0.6),lwd=0.01)
        }
        else {
            polygon(
                c(0,low * eff[i,1]),c(-1,low)+2*i-1,
                border=NA,
                col=ifelse(eff[i,1]>0,g,p),
                lwd =0.5)
            polygon(c(0,low * (eff[i,1]+2*eff[i,2]),rev(low) *(eff[i,1]-2*eff[i,2])),
                    c(0,low,rev(low))+2*i-1,
                    border=NA,
                    col=alpha("white",0.6),lwd=0.01)
            polygon(
                c(high * eff[i,1],0),c(highh,1)+2*i-1,
                col=ifelse(eff[i,1]>0,p,g),
                border=NA,
                lwd =0.5)
            polygon(c(0,high*(eff[i,1]+2*eff[i,2]), rev(high)*(eff[i,1]-2*eff[i,2])),
                    c(0,high,rev(high))+2*i-1,
                    border=NA,
                    col=alpha("white",0.6),lwd=0.01)
        }
        lines(range,c(2*i,2*i),lwd=0.2)
        lines(range,c(2*i,2*i)-2,lwd=0.2)
    }
    lines(c(0,0),c(-2*N*0.04,2*N + 0.9),lwd=0.2)
    lines(c(-xx,xx),c(0,0),lwd=0.2)
}


scotglm <- vglm(formula =
                cbind(offline,
                      connected)
                ~ (income + employment + housing + health + education + crime)+slowlines + isolation,
                family = betabinomial, 
                data = training, trace = TRUE)
CAglmab <- vglm(formula =
                cbind(offline,
                      connected)
                ~ CA,
                family = betabinomial.ab, 
                data = training, trace = TRUE)

scot2glm <-
    vglm(formula =
         cbind(offline,
               connected)
         ~ slowlines + income + employment + housing
         + health + education + crime + isolation,
         family = betabinomial(zero=NULL), 
         data = training, trace = TRUE)
scotcoef <- coef(summary(scot2glm))
medscot <- scotcoef[1,1]

st <- subset(training, CA !="Eilean Siar")
st$CA <- factor(st$CA)

CAglm <- vglm(formula =
              cbind(offline,
                    connected)
              ~ CA +
              (income + employment +  housing +  health + education + crime)+slowlines + isolation,
              family = betabinomial(zero=NULL), 
              data = st, trace = TRUE)
CA1 <- coef(summary(CAglm))

CA2glm <-
    vglm(formula =
         cbind(offline,
               connected)
         ~
         slowlines + isolation  +  CA + CA:(income + employment +  housing +  health + education + crime),
         family = betabinomial(zero=NULL), 
         data = st, trace = TRUE)
CAcoef <- coef(summary(CA2glm))

CAoffsets <- CAcoef[c(1,0:29 * 2 +7),1:2]
rownames(CAoffsets)[1] <- "CAAberdeen City:1"
CAoffsets[2:31,1] <- CAoffsets[2:31,1] + CAoffsets[1,1]
CAoffsets[,1] <- CAoffsets[,1] - medscot

## CAoffsets <- CA1[0:30 * 2 + 1,1:2]
## CAoffsets[,1] <- CAoffsets[,1]-medscot

graphics.off()
grDevices::quartz.options(width  = 175 * mm,
                          height = 200 * mm ,
                          pointsize = 11)
par(oma=c(1,1,1,1),mfrow=c(1,1))
## eff <- coef(summary(scot2glm))[c(3:8,10),1:2]

eff <- scotcoef[c(0:6)*2 + 5,1:2]
xx <- max(abs(eff[,1]))
ploteff(eff,mytitle="Scotland",range=log(c(0.6,1.35)),mar=c(3,0.1,2,0.1))
text(log(0.6),(1:7)*2 -1,labels=c("income","employment","housing","health","education","crime","isolation"),adj=0)
mtext(side=1,"change in the odds of being offline",line=2.5)
mkall("III.5Scotlandfactors")
graphics.off()
grDevices::quartz.options(width  = 200 * mm,
                          height = 200 * mm * sqrt(2),
                          pointsize = 11)
par(oma=c(1,1,1,1),mfrow=c(4,8))
ploteff(eff,mytitle="Scotland")
## cc <- coef(summary(CAglm))
cc <- CAcoef
for(i in 1:31){
    ## eff <- rbind(cc[(0:5) * 31 + i + 34,1:2],cc[34,1:2])
    eff <- rbind(cc[(0:5) * 62 + 2*i + 65,1:2],cc[5:6,1:2])[1:7,]
    ploteff(eff,
            mar=c(2,0.4,2,0.4),
            mytitle=sub("West","W.",(sub("East","E.",sub("North","N.",sub("South","S.",
                levels(training$CA)[i]))))))
    polygon(c(0,0,CAoffsets[i,1],CAoffsets[i,1]),c(0,1,1,0) * 0.5 + 14.4,col=ifelse(CAoffsets[i,1]>0,p,g),
            border=NA,
            lwd =0.5)
    polygon(CAoffsets[i,1] + c(-CAoffsets[i,2],-CAoffsets[i,2],CAoffsets[i,2],CAoffsets[i,2])*2,c(0,1,1,0) * 0.5 + 14.4,col=alpha("white",0.6),
            border=NA,
            lwd =0.5)
    polygon(-CAoffsets[i,1] + c(-CAoffsets[i,2],-CAoffsets[i,2],CAoffsets[i,2],CAoffsets[i,2])*2,c(0,1,1,0) * 0.5 + 14.4,col=alpha("white",0.6),
            border=NA,
            lwd =0.5)

}
mkall("III.6CAfactors")

graphics.off()
grDevices::quartz.options(width  = 175 * mm,
                          height = 120 * mm,
                          pointsize = 11)
par(mgp=c(2.1,1,0),
    mar=c(3,3,0,0) + 0.1)
plot(0, 0, type="n",
     ylab="households offline",
     xlab="isolation",
     xlim=c(0,1),ylim=c(0.10,0.4),
     xaxt="n",yaxt="n",
     frame.plot=F)
axis(side=2,
     at=seq(0,0.5, by=0.1),
     labels=Map(function(x)paste(x,"%",sep=""),seq(0,50,by=10)))
axis(side=1,
     at=seq(0,1,by=0.2),
     labels=seq(0,100,by=20))
test <- data.frame(isolation = seq(0,1, by=0.01))
legend("top", ncol = 2,
       legend=c("sfast = Y  slow = Y","sfast = Y  slow = N","sfast = N  slow = Y","sfast = N  slow = N"),
       fill=c("red","green","blue","cyan"),
       cex=1,
       bty="n")
for (slow in  c("Y","N")){
    for (fast in  c("N","Y")){
        m <- subset(model, slowlines == slow & superfast == fast)
        simdgam <-
            vgam(formula =
                 cbind(offline,
                       connected)
                 ~ s(isolation,df=5),
                 family = betabinomial.ab,
                 data = m,
                 trace = TRUE,
                 )
        p <- exp(predict(simdgam))
        symbols(m$isolation,
                qbeta(0.5, shape1=p[,1],shape2=p[,2]),
                circles=sqrt(m$Household_Count),
                inches=0.1,
                fg=alpha(
                    ifelse(fast=="Y",
                           ifelse(slow=="Y","red","green"),
                           ifelse(slow=="Y","blue","cyan")),
                    0.2),
                bg=alpha(
                    ifelse(fast=="Y",
                           ifelse(slow=="Y","red","green"),
                           ifelse(slow=="Y","blue","cyan")),
                    0.2),
                add=T)
    }
}
mkall("1.3fs-isol")
## ********************************************
mai <- c(0.6,0.1,0.385,0.1)
grDevices::quartz.options(width  = 175 * mm,
                          height = 175 * mm + (mai[1]+mai[3]),
                          pointsize = 11)
par(mgp=c(2.1,1,0),
    mar=c(3,3,0,0) + 0.1)
plot(0, 0, type="n",
     ylab="households offline",xlab="SIMD centile",
     xlim=c(-1,1),
     ylim=c(0.1,0.5),
     xaxt="n",yaxt="n",
     frame.plot=F)
axis(side=2,
     at=seq(0,0.6,by=0.1),
     labels=Map(function(x)paste(x,"%",sep=""),seq(0,60,by=10)))
axis(side=1,at=seq(-1,1,by=0.4),
     labels=Map(function(x)paste(x,"",sep=""),seq(0,100,by=20)))
test <- data.frame(simd = seq(-1,1, by=0.01))
tmp <- legend("bottomleft",legend= paste("        k",ur6),
              fill=ur6pal,cex=1,bty="n")
text(x=tmp$text$x + 0.115,
     y= tmp$text$y-0.0001,
     labels=formatC(signif(round(ur6sizes$hh/1000),3), big.mark=","),
     adj=c(1),
     cex=1)
for (i in 1:6){
    m <- subset(model,UrbRur6_2011_2012 == i)
    simdgam <-
        vgam(formula =
             cbind(offline,
                   connected)
             ~ s(simd),
             family = betabinomial.ab,
             data = m,
             trace = TRUE,
             )
    p <- exp(predict(simdgam))
    symbols(m$simd,
            qbeta(0.5, shape1=p[,1],shape2=p[,2]),
            circles= sqrt(pmax(m$Household_Count , m$connections)),
            fg=alpha(ur6pal[i], 0.2),
            bg=alpha(ur6pal[i], 0.2),
            inches= 0.1,
            add=T)
    p <- exp(predict(simdgam,newdata=test))
    ## lines(test$simd,qbeta(0.2, shape1=p[,1],shape2=p[,2]), col=ur6pal[i],lwd=4)
    ## lines(test$simd,qbeta(0.8, shape1=p[,1],shape2=p[,2]), col=ur6pal[i],lwd=4)
}
mkall("1.4ur6-simd")
## ********************************************
graphics.off()
grDevices::quartz.options(width  = 175 * mm,
                          height = 120 * mm,
                          pointsize = 11)
par(mgp=c(2.1,1,0),mfrow=c(1,1),
    mar=c(3,3,0,1) + 0.1)
plot(0, 0, type="n",ylab="households offline",
     xlab="median download speed (Mb/s)",
     xlim=c(0,32),
     ylim=c(0,0.45),
     xaxt="n",yaxt="n",
     frame.plot=F)
axis(side=2,
     at=seq(0,0.5,by=0.1),
     labels=Map(function(x)paste(x,"%",sep=""),seq(0,50,by=10)))
axis(side=1,at=seq(-0,35,by=5))
legend("bottom",legend=ur6,fill=ur6pal,cex=1,bty="n")
test <- data.frame(medspeed = seq(0,31,by=0.1))
with(model,
     for (q in levels(decile)){
         m <- subset(model,
                     decile == q & !is.na(medspeed))
         speedgam <-
             vgam(formula =
                  cbind(offline,
                        connected)
                  ~ s(medspeed,df=4),
                  family = betabinomial.ab,
                  data = m,
                  trace = TRUE,
                  )
         lines(test$medspeed,
               predict(speedgam,
                       type="response",
                       newdata=test),
               lwd=0.5)
         symbols(m$medspeed,
                 predict(speedgam,type="response"),
                 circles=sqrt(m$Household_Count),
                 inches=0.1,
                 fg=alpha(ur6pal[m$UrbRur6_2011_2012],0.2),
                 bg=alpha(ur6pal[m$UrbRur6_2011_2012],0.2),
                 add=T)
         text(32,
              predict(speedgam,
                      type="response",
                      newdata=data.frame(medspeed=31)),
              labels= c(q),adj=c(0,0.6))
     }
     )
mtext(side=4,"simd decile", adj=0.7)
mkall("1.5speed-simd-decile")


## Log-likelihood: -327283.6 on 266676 degrees of freedom
graphics.off()
mai <- c(0.6,0.2,0.2,0.2)
sdz <- max(dbeta(x=seq(0,1,by=0.01) ,shape1=exp(coef(dzglm)[1]),shape2=exp(coef(dzglm)[2])))
spc <- max(dbeta(x=seq(0,1,by=0.01) ,shape1=exp(coef(pcglm)[1]),shape2=exp(coef(pcglm)[2])))
grDevices::quartz.options(width  = 175 * mm,
                          height = 100 * mm +  (mai[1]+mai[3]),
                          pointsize = 11)
par(mgp=c(2.1,1,0),mai=mai)
## p <- exp(coef(pcglm)[])
## s = max(dbeta(seq(0,1,0.01) ,shape1=p[1],shape2=p[2]))
x1 = 1 + 0.1
x2 = x1 + 1/sdz
par(mgp=c(2.1,1,0),
    mar=c(3,0,0,0)+0.1,
    bty='n')
plot(0, 0, type="n",
     xlab="Variation in the likelihood of being offline, across Scotland.",
     xlim=c(0,x2),
     ylim=c(0,sdz),
     xaxt="n",yaxt="n")
axis(side=1,at=seq(0,1,by=0.2),
     labels=Map(function(x)paste(x,"%",sep=""),seq(0,100,by=20)))
for (i in 10:1){
    xmax <- i/10
    x <- seq(0,xmax,by=0.005)
    polygon(c(0,x,xmax),
            c(0, dbeta(x ,shape1=exp(coef(pcglm)[1]),shape2=exp(coef(pcglm)[2])) ,0),
            col=c(brewer.pal(9,"Purples")[9:2], brewer.pal(7,"PRGn")[5:6])[11-i], lwd =0.5)
}

for (i in 10:1){
    y <- pbeta(i/10, shape1=exp(coef(pcglm)[1]), shape2=exp(coef(pcglm)[2])) * sdz
    polygon(c(x1,x1,x2,x2),
            c(0,y,y,0),
            col=c(brewer.pal(9,"Purples")[9:2],brewer.pal(7,"PRGn")[5:6])[11-i],lwd =0.5)
    if(i < 8){
        text((x1 + x2)/2, y-0.03, adj=c(0.5,1),labels=paste("< ", 10*i,"%",sep=""))
    }
}
text((x1 +x2)/2, par("usr")[3],labels="Scotland",adj=0.5, xpd = TRUE)
mkall("1.6ScotlandOffline")

## simd ******************************************
graphics.off()
mai <- c(0.6,0.1,0.385,0.1)
grDevices::quartz.options(width  = 175 * mm,
                          height = 175 * mm + (mai[1]+mai[3]),
                          pointsize = 11)
par(mgp=c(2,1,0),mar=c(3,0.5,1,0.5) + 0.1,bty='n',tck=0.01)
plot(0, 0, type="n",xlab="SIMD centile",asp=2,
     xlim=c(-1,1),ylim=c(0,1),
     xaxt="n",yaxt="n")
legend(x=0, xjust=0.505,
       y=1.01,yjust=0,
       legend=c("< 10% ","< 20% ","< 30% ","< 40% ","< 50% ","< 60% ","< 70% ","< 80% ","< 90%"),
       fill=rev(c(brewer.pal(9,"Purples")[8:2],brewer.pal(7,"PRGn")[5:6])),horiz=T,
       text.width=2/18,cex=0.9,bty="n")
mtext("percentage of neighbours offline",side=3)
axis(side=1,at=seq(-1,1,by=0.4),
     labels=seq(0,100,by=20))
simdgam <-
    vgam(formula =
         cbind(offline,
               connected)
         ~ s(simd,df=7),
         family = betabinomial.ab,
         data = model,
         trace = TRUE,
         )
## Log-likelihood: -318518.7 on 266669.2 degrees of freedom
test <- data.frame(simd = seq(-1,1, by=0.01))
p <- exp(predict(simdgam,newdata=test))
for (i in 10:1){
    polygon(c(-1,test$simd,1),
            c(0,pbeta(i/10, shape1=p[,1],shape2=p[,2]),0),
            col=c(brewer.pal(9,"Purples")[9:2], brewer.pal(7,"PRGn")[5:6])[11-i],lwd =0.5)
}
mkall("1.7simd")


## **************************************************************************

Last modified: Fri Jun 20 16:30:31 BST 2014