An individual-based network model to evaluate interventions for controlling pneumococcal transmission

Interact

Author:

  • Gerry Tonkin-Hill @gtonkinhill
  • Alex Zarebski @aezarebski

Date: 2018-10-01

Summary

Karlsson et al. developed a contact network model to evaluate the efficacy of interventions aiming to control pneumococcal transmission. They used demographic data from Sweden during the mid-2000s to estimate model parameters.

In their model individuals are assigned several features: an age, a household, and potentially a class in school/day care. These features then influence the rate of transmission between individuals in the population. Together this defines a stochastic process of the number of people infected. We carry out a simulation of this process and then generate some visualisations of this.

Model and implementation

Initially we need to set the parameters of the model. These are taken from the paper and estimated using Swedish demographic data.

library(ggplot2)
library(data.table)
#transmission rates
transmission.households <- 0.07
transmission.DCCs <- 0.04
transmission.school.classes <- 0.03
transmission.other.contacts <- 0.04

#age bias
age.bias.1 <- 1
age.bias.1to6 <- 0.33
age.bias.7to65 <- 0.18
age.bias.65 <- 0.3

prop.children.attending.DCCs <- 0.79
average.group.size.DCC <- 16.7
average.class.size.school <- 22
probability.of.other.contacts <- 0.5

pop.size <- 5000
n.initial.carriers <- 125
stop.time <- 156

#sweden population pyramid
sweden.pop.props <- c(2.9,2.9,2.7,2.4,3.4,3.4,3.1,3,3.1,3.5,3.1,2.9,2.7,3.2,2.6,1.9,1.4,1,0.6,0.1,0)
names(sweden.pop.props) <- c(seq(0,100,5)[-1], "100+")

#sweden family size proportions
num.siblings.props <- c(0.19,0.48,0.23,0.10)
names(num.siblings.props) <- c(0,1,2,3)
prop.single.parent <- 0.19

We now give the individuals in the simulation an age as well as assign them to schools, families and day care centres (DCC’s) where appropriate.

pop.ages <- as.numeric(sample(names(sweden.pop.props), pop.size, replace = TRUE, prob = sweden.pop.props))
family.membership <- rep(NA, pop.size)
school.membership <- rep(NA, pop.size)
dcc.membership <- rep(NA, pop.size)
other.close.contact.pair <- rep(NA, pop.size)

#randomly assign an individual a close contact with probability 0.5.
npairs <- rbinom(1, pop.size, 0.5)
pairs <- matrix(sample(pop.size, 2*npairs, replace = TRUE), ncol=2)

#assign families
fam.id <- 1
while(sum(is.na(family.membership[pop.ages<=20]))>0){ #we still have un-assigned children
  n.children <- sample(as.numeric(names(num.siblings.props)), 1, prob=num.siblings.props) + 1
  child.index <- which((pop.ages<=20) & is.na(family.membership))[1:n.children]
  n.parents <- sample(c(1,2), 1, prob = c(prop.single.parent, 1-prop.single.parent))
  parent.index <- which((pop.ages>20) & (pop.ages<=50) & is.na(family.membership))[1:n.parents]
  family.membership[c(child.index, parent.index)] <- fam.id
  fam.id <- fam.id + 1
}

#assign dcc
dcc.id <- 1
n.under.5 <- sum(pop.ages<=5)
indexs.in.dcc <- sample(which(pop.ages<=5), size = floor(prop.children.attending.DCCs*n.under.5))
family.ids <-  unique(family.membership[indexs.in.dcc])
dcc.size <- 0
for(f in family.ids){
  index <- indexs.in.dcc[family.membership[indexs.in.dcc]==f]
  dcc.membership[index] <- dcc.id
  dcc.size <- dcc.size + length(index)
  if(dcc.size>average.group.size.DCC){
    dcc.size <- 0
    dcc.id <- dcc.id + 1
  }
}

#assign schools
#5 to 10
class.id <- 1
indexes.in.school <- which((pop.ages>5) & (pop.ages<=10))
family.ids <-  unique(family.membership[indexes.in.school])
class.size <- 0
for(f in family.ids){
  index <- indexes.in.school[family.membership[indexes.in.school]==f]
  school.membership[index] <- class.id
  class.size <- class.size + length(index)
  if(class.size>average.class.size.school){
    class.size <- 0
    class.id <- class.id + 1
  }
}

#10 to 15yrs
indexes.in.school <- which((pop.ages>10) & (pop.ages<=15))
family.ids <-  unique(family.membership[indexes.in.school])
class.size <- 0
for(f in family.ids){
  index <- indexes.in.school[family.membership[indexes.in.school]==f]
  school.membership[index] <- class.id
  class.size <- class.size + length(index)
  if(class.size>average.class.size.school){
    class.size <- 0
    class.id <- class.id + 1
  }
}

After we’ve assigned individuals to groups we need to set the rate of transmission between people.

transition.matrix <- matrix(1, nrow = pop.size, ncol = pop.size)
#household transmission
for(f in unique(family.membership[!is.na(family.membership)])){
  index <- which(family.membership==f)
  transition.matrix[t(combn(index, 2))] <- transition.matrix[t(combn(index, 2))] * (
    1 - transmission.households)
}

#dcc transmission
for(f in unique(dcc.membership[!is.na(dcc.membership)])){
  index <- which(dcc.membership==f)
  transition.matrix[t(combn(index, 2))] <- transition.matrix[t(combn(index, 2))] * (
    1 - transmission.DCCs)
}

#school transmission
for(f in unique(school.membership[!is.na(school.membership)])){
  index <- which(school.membership==f)
  transition.matrix[t(combn(index, 2))] <- transition.matrix[t(combn(index, 2))] * (
    1 - transmission.school.classes)
}

#other transmission
transition.matrix[pairs] <- transition.matrix[pairs] * (1 - transmission.other.contacts)
transition.matrix[pairs[,c(2,1)]] <- transition.matrix[pairs[,c(2,1)]] * (1 - transmission.other.contacts)

#bias probability vector
bias.probs <- rep(NA, pop.size)
bias.probs[pop.ages<=5] <- 0.33
bias.probs[pop.ages>65] <- 0.3
bias.probs[is.na(bias.probs)] <- 0.18

Now lets simulate! We assume a slightly simpler carriage duration of Poisson with mean 4 weeks.

status.vector <- rep(0, pop.size)
remaining.carriage <- rep(0, pop.size)

#initialise infections
index <- sample(pop.size, n.initial.carriers)
status.vector[index] <- 1
remaining.carriage[index] <- rpois(n = n.initial.carriers, lambda = 4)

pop.ages.factor <- factor(pop.ages)
n.infected.vector <- rep(NA, stop.time)
n.infected.age.matrix <- matrix(NA, nrow = stop.time, ncol = length(unique(pop.ages.factor)))
colnames(n.infected.age.matrix) <- levels(pop.ages.factor)

for(t in 1:stop.time){
  # message(paste("Time:", t))
  
  prob.infection <- 1-exp(rowSums(log((1-status.vector) * transition.matrix)))
  prob.infection <- prob.infection*bias.probs
  is.infected <- runif(pop.size)<prob.infection
  
  n.new.infections <- sum((status.vector<=0) & is.infected)
  remaining.carriage[(status.vector<=0) & is.infected] <- rpois(n = n.new.infections, lambda = 4) + 1
  status.vector <- is.infected
  remaining.carriage <- remaining.carriage-1
  remaining.carriage[remaining.carriage<0] <- 0
  status.vector[remaining.carriage<=0] <- 0
  
  n.infected.vector[[t]] <- sum(status.vector)
  
  n.infected.age.matrix[t,] <- table(pop.ages.factor[status.vector==1])
  
}

We plot the number of people infected over time.

plot.df <- data.frame(time=1:stop.time, total.infected=n.infected.vector, 
                      stringsAsFactors = FALSE)

ggplot(plot.df, aes(x=time, y=total.infected)) +
  geom_line() + xlab("time (weeks)") + ylab("total infected")

png

We plot the incidence in different age groups.

plot.df <- melt(n.infected.age.matrix)
colnames(plot.df) <- c("time", "age.group", "number infected")
plot.df$age.group <- cut(plot.df$age.group, breaks = c(0,5,10,15,20,50,100))

ggplot(plot.df, aes(x=time, y=`number infected`, col=age.group)) + 
  geom_line() + 
  xlab("time (weeks)") + ylab("total infected")

png

We can compare this to the number in each group.

table(cut(pop.ages, breaks = c(0,5,10,15,20,50,100)))

   (0,5]   (5,10]  (10,15]  (15,20]  (20,50] (50,100] 
     339      282      243      214     1970     1952