#title: "Neutral community model"
#With all present in target present in source
#edited from custom files on December 21, 2022 for inclusion as supplementary
#material for Fowler SJ, Torresi E, Dechesne A, Smets BF (2023). Biofilm
#thickness controls the relative importance of stochastic and deterministic #processes in microbial community assembly in Moving Bed Biofilm Reactors. #Interface Focus

#read in a phyloseq object with the samples and taxa numbered as required for NCM
library(phyloseq)
library(Hmisc)

ps_NCM<-readRDS("PS_object")
#transpose, then convert OTU table to data frame 
otu_table(ps_NCM)
otu_table(ps_NCM)<-t(otu_table(ps_NCM))#transpose to taxa are columns
otu_table(ps_NCM)
dim(otu_table(ps_NCM))
OTU<-as.data.frame(otu_table(ps_NCM))
dim(OTU)
dS<-OTU
```

```{r}
#Start at neutral community model
write.table(OTU,file="OTU_NCM.csv",sep="\t",row.names=FALSE,col.names=TRUE)
vG=read.delim("OTU_NCM.csv",check.names=FALSE)
allOTUs<-as.numeric(colnames(vG))


# Indicate row numbers for Target community and create a new matrix with the target community

Bact=dS[c("17","18","19","20","21","22","23","24","25","26","29","30","31","32","33","34","35","36","37","38"),]

# Create a matrix with influent as source
Bacs=dS[c(1,5,9,13,28,7,3,27,15,11),]

# Calculate frequency and relative abundance of each OTU in the target community

Bactfreq=c()
for (i in 1:ncol(Bact)){
  Bactfreq[i]=length(which(Bact[,i]>0))/nrow(Bact)
}


Bactpi=colMeans(Bact)

# Calculate frequency and relative abundance of each OTU in the source community
Bacsfreq=c()
for (i in 1:ncol(Bacs)){
  Bacsfreq[i]=length(which(Bacs[,i]>0))/nrow(Bacs)
}

Bacspi=colMeans(Bacs)

colind=c(1:ncol(Bact))

Bac_neutral_data=matrix(nrow=length(Bactfreq),ncol=6)

for (i in 1:nrow(Bac_neutral_data)){
  
  Bac_neutral_data[i,1]=Bactfreq[i]
  Bac_neutral_data[i,2]=Bactpi[i]
  Bac_neutral_data[i,3]=Bacsfreq[i]
  Bac_neutral_data[i,4]=Bacspi[i]
  Bac_neutral_data[i,5]=colind[i]
  Bac_neutral_data[i,6]=allOTUs[i]
}


# Sort the OTUs by their abundance in the source community just like the neutral model in the excel spreadsheet for the neutral model 
Bac_neutral_data_sorted=Bac_neutral_data[order(Bac_neutral_data[,4],decreasing=TRUE),]
reactor=length(which(Bac_neutral_data_sorted[,1]>0))
source=length(which(Bac_neutral_data_sorted[,3]>0))

shared=length(which(Bac_neutral_data_sorted[,1]>0 & Bac_neutral_data_sorted[,3]>0))#present in target and source

#draw.pairwise.venn(source,reactor,shared,col=c("red","black"),fill=c("indianred","black"),label.col=c("black"))
# consider only shared OTUs b/w the target and source
onlyshared=which(Bac_neutral_data_sorted[,1]>0&Bac_neutral_data_sorted[,3]>0)
passthrough=which(Bac_neutral_data_sorted[,2]==0&Bac_neutral_data_sorted[,4]>0)
reaconly=which(Bac_neutral_data_sorted[,2]>0&Bac_neutral_data_sorted[,4]==0)

passthrough1=subset(Bac_neutral_data_sorted[passthrough,])
reaconly1=subset(Bac_neutral_data_sorted[reaconly,])
write.table(passthrough1,file="passthrough.txt",sep="\t",row.names=FALSE,col.names=c("Reactor_freq","Reactor_abund","Source_freq","Source_abund","Col_ind","OTU#"))
write.table(reaconly1,file="TargetOnly.txt",sep="\t",row.names=FALSE,col.names=c("Reactor_freq","Reactor_abund","Source_freq","Source_abund","Col_ind","OTU#"))


Bac_neutral_data_final=subset(Bac_neutral_data_sorted[onlyshared,])
Bac_neutral_data_final


#Input the lowest abundance detected in the target 
#min is the lowest abundance detected in the target

#should match dim of Bac_neutral_data_final_1
dim(Bac_neutral_data_final)

min<-min(Bac_neutral_data_final[,4])

col=which(Bac_neutral_data_final[,4]>=min)
Bac_neutral_data_final_1=matrix(nrow=length(col),ncol=6)
for (i in 1:length(col)){
  
  Bac_neutral_data_final_1[i,1]= Bac_neutral_data_final[i,1]
  Bac_neutral_data_final_1[i,2]= Bac_neutral_data_final[i,2]
  Bac_neutral_data_final_1[i,3]= Bac_neutral_data_final[i,3]
  Bac_neutral_data_final_1[i,4]= Bac_neutral_data_final[i,4]
  Bac_neutral_data_final_1[i,5]= Bac_neutral_data_final[i,5]
  Bac_neutral_data_final_1[i,6]= Bac_neutral_data_final[i,6]
}

write.table(Bac_neutral_data_final_1,file="FILENAME.txt",sep="\t",row.names=FALSE,col.names=c("Reactor_freq","Reactor_abund","Source_freq","Source_abund","Col_ind","OTU#"))

obs=read.delim("FILENAME.txt")
print(dim(obs))
detlim=obs[nrow(obs),4]
```

```{r}
# Vary Ntm from 1 to 100000 in steps of 0.5
Ntm= seq(1,10000,0.5)

objfun= matrix(nrow=length(Ntm),ncol=2)


for (j in 1:length(Ntm)){
  
  Ntmloop=Ntm[j]
  
  neutralmatrix= matrix(nrow=nrow(obs),ncol=5)
  
  
  for (i in 1:nrow(obs)){
    neutralmatrix[i,1]=  Ntmloop*obs[i,4]
  }
  for (i in 1:nrow(obs)){
    neutralmatrix[i,2]=  Ntmloop*(1-obs[i,4])
  }
  for (i in 1:nrow(obs)){
    neutralmatrix[i,3]= pbeta(detlim,neutralmatrix[i,1],neutralmatrix[i,2])
  }
  for (i in 1:nrow(obs)){
    neutralmatrix[i,4]= 1-neutralmatrix[i,3]
  }
  for (i in 1:nrow(obs)){
    neutralmatrix[i,5]= (obs[i,1]-neutralmatrix[i,4])^2
  }
  sumcolumns=colSums(neutralmatrix)
  
  
  objfun[j,1]=sumcolumns[5]
  objfun[j,2]=Ntmloop
}

#determine the best minimum objective function
minobj=which.min(objfun[,1])
minobjfun=objfun[minobj[1],1]


#determine the best Ntm value
bestNtm=objfun[minobj[1],2]
bestNtm
minobjfun
#calculate all the parameters for the best neutral model
bestneutralmatrix= matrix(nrow=nrow(obs),ncol=9)

for (i in 1:nrow(obs)){
  bestneutralmatrix[i,1]=  bestNtm*obs[i,4]
}
for (i in 1:nrow(obs)){
  bestneutralmatrix[i,2]=  bestNtm*(1-obs[i,4])
}
for (i in 1:nrow(obs)){
  bestneutralmatrix[i,3]= pbeta(detlim,bestneutralmatrix[i,1],bestneutralmatrix[i,2])
}
for (i in 1:nrow(obs)){
  bestneutralmatrix[i,4]= 1-bestneutralmatrix[i,3]
}
for (i in 1:nrow(obs)){
  bestneutralmatrix[i,5]= (obs[i,1]-bestneutralmatrix[i,4])^2
}
for (i in 1:nrow(obs)){
  bestneutralmatrix[i,6]= (obs[i,5])
}
for (i in 1:nrow(obs)){
  bestneutralmatrix[i,7]= (obs[i,6])
}
for (i in 1:nrow(obs)){
  bestneutralmatrix[i,8]= binconf(x=nrow(Bact)*bestneutralmatrix[i,4],n=nrow(Bact),alpha=0.05,method="wilson")[2]
}
for (i in 1:nrow(obs)){
  bestneutralmatrix[i,9]= binconf(x=nrow(Bact)*bestneutralmatrix[i,4],n=nrow(Bact),alpha=0.05,method="wilson")[3]
}


OTUlist_environmental=c()
j=1;

for (i in 1:nrow(bestneutralmatrix)){
  j=j
  
  #diff= abs(bestneutralmatrix[i,4]-obs[i,2])
  # Choose the outliers based upon the confidence limits and the ratio of the observed frequency and the value predicted by the neutral model
  diff=(obs[i,1]/bestneutralmatrix[i,4])
  if (diff >=1.5 & obs[i,1]>= bestneutralmatrix[i,9]){
    OTUlist_environmental[j]=bestneutralmatrix[i,6]
    j=j+1
    
  }
  
}

OTUlist_environmental_against=c()
j=1;
for (i in 1:nrow(bestneutralmatrix)){
  j=j
  
  #diff= abs(bestneutralmatrix[i,4]-obs[i,2])
  diff=(obs[i,1]/bestneutralmatrix[i,4])
  if (diff <=0.75 & obs[i,1]<=bestneutralmatrix[i,8]){
    OTUlist_environmental_against[j]=bestneutralmatrix[i,6]
    j=j+1
    
  }
  
}

"%w/o%" <- function(x, y) x[!x %in% y] #--  x without y
vf=c(OTUlist_environmental,OTUlist_environmental_against)
OTUlist_neutral= obs[,5] %w/o% vf

dd_env=which(obs[,5] %in% OTUlist_environmental)
environmental_matrix=matrix(nrow=length(dd_env),ncol=6)
for (i in 1:length(dd_env)){
  environmental_matrix[i,1]=obs[dd_env[i],1]
  environmental_matrix[i,2]=obs[dd_env[i],2]
  environmental_matrix[i,3]=obs[dd_env[i],3]
  environmental_matrix[i,4]=obs[dd_env[i],4]
  environmental_matrix[i,5]=obs[dd_env[i],5]
  environmental_matrix[i,6]=obs[dd_env[i],6]
  
}

write.table(environmental_matrix,file="Bac.env.sel.FOR.txt",sep="\t",row.names=FALSE,col.names=c("Reactor_freq","Reactor_abund","Source_freq","Source_abund","Col_ind","OTU#"))
dd_against=which(obs[,5] %in% OTUlist_environmental_against)		
environmental_against_matrix=matrix(nrow=length(dd_against),ncol=6)
for (i in 1:length(dd_against)){
  environmental_against_matrix[i,1]=obs[dd_against[i],1]
  environmental_against_matrix[i,2]=obs[dd_against[i],2]
  environmental_against_matrix[i,3]=obs[dd_against[i],3]
  environmental_against_matrix[i,4]=obs[dd_against[i],4]
  environmental_against_matrix[i,5]=obs[dd_against[i],5]
  environmental_against_matrix[i,6]=obs[dd_against[i],6]
}

write.table(environmental_against_matrix,file="Bac.env.sel.AGAINST.txt",sep="\t",row.names=FALSE,col.names=c("Reactor_freq","Reactor_abund","Source_freq","Source_abund","Col_ind","OTU#"))

dd_neut=which(obs[,5] %in% OTUlist_neutral)
neutral_matrix=matrix(nrow=length(dd_neut),ncol=6)
for (i in 1:length(dd_neut)){
  neutral_matrix[i,1]=obs[dd_neut[i],1]
  neutral_matrix[i,2]=obs[dd_neut[i],2]
  neutral_matrix[i,3]=obs[dd_neut[i],3]
  neutral_matrix[i,4]=obs[dd_neut[i],4]
  neutral_matrix[i,5]=obs[dd_neut[i],5]
  neutral_matrix[i,6]=obs[dd_neut[i],6]
}
write.table(neutral_matrix,file="Bac.NEUTRAL.txt",sep="\t",row.names=FALSE,col.names=c("Reactor_freq","Reactor_abund","Source_freq","Source_abund","Col_ind","OTU#"))

#Below shows best Ntm then minobjfun

#Plot results


plot(obs[,4]*100,bestneutralmatrix[,4]*100,type="l",col="black",lwd=2,xlab="Mean relative abundance in source (%)",ylab="Detection frequency in Z500 (%)",cex.lab=1.5,log="x",xlim=c(0.000025,60))
lines(obs[,4]*100,bestneutralmatrix[,8]*100,type="l",col="black",lwd=2,lty=2)
lines(obs[,4]*100,bestneutralmatrix[,9]*100,type="l",col="black",lwd=2,lty=2)
points(jitter(neutral_matrix[,4]*100),neutral_matrix[,1]*100,col="darkgrey",pch=20,cex=1.1)
points(jitter(environmental_against_matrix[,4]*100),environmental_against_matrix[,1]*100,col="indianred",pch=20,cex=1.1)	
points(jitter(environmental_matrix[,4]*100),environmental_matrix[,1]*100,col="darkgreen",pch=20,cex=1.1)
print(minobjfun)
print(bestNtm)
correlcoeff= cor.test(obs[,1],bestneutralmatrix[,4])
print(correlcoeff)
correlcoeff2= cor.test(obs[,1],bestneutralmatrix[,4],method="spearman")
print(correlcoeff2)


```