knitr::opts_chunk$set(echo=FALSE)
#knitr::knit_hooks$set(optipng = knitr::hook_optipng)
#library(xlsx)
library("openxlsx")
library(plyr)
library(dplyr)
library(ggbeeswarm)
library(reshape2)
library(data.table)
library(ggplot2)
library(TCGAbiolinks)
library(SummarizedExperiment)
library(ggsignif)
library(survminer)
library(survival)
library(kableExtra)
library(Hmisc)
library(stringr)
library(maxstat)
library(qwraps2)
options(qwraps2_markup = "markdown")
library(DT)
library(ggrepel)
lm_eqn <- function(df,y1,x1){
    lm_eqn_res <- eval(parse(text=paste0("lm(`",y1,"` ~ ",x1,", ",df,")")))
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(lm_eqn_res)[1]), digits = 2),
              b = format(unname(coef(lm_eqn_res)[2]), digits = 2),
             r2 = format(summary(lm_eqn_res)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}

wb <- createWorkbook()
sigStyle <- createStyle(fontColour = "#000000", bgFill = "#cd5c5c")
emptyStyle <- createStyle(fontColour = "#000000", bgFill =  "#FFFFFF")
#library(tables)
cancers=params$selected_cancers_tcga

load('./data/tcga_SBS2021_weights.RData')
tcga=as.data.frame(tcga)
rownames(tcga)=gsub("\\.", "-",rownames(tcga) )
merged=do.call(rbind,params$selected_data)
merged=as.data.frame(merged)
merged$submitter_id=as.character(merged$submitter_id)
merged=merge(merged, tcga, by.x="submitter_id", by.y="row.names")

if(any(unique(merged$disease) %in% cancers)){
  
    cat('\n')  
    cat('\n') 
      cat(paste0('# COSMICv3 signatures extracted from WES using deconstructSigs, weights')) 
      cat('\n') 
      
summary_table = list()

for(mycancer in cancers){
subset1=merged[merged$disease==mycancer,]

    cat('\n')  
    cat('\n') 
      cat(paste0('## ',mycancer)) 
      cat('\n') 

if(nrow(merged)>9){
nocommnet <- sapply(colnames(tcga), function(x) {
if(mean(merged[,x]!=0)){
cor_POLQ <- cor(merged[,params$selected_gene],merged[,x],use="complete.obs")
cor_POLQ3 = round(cor_POLQ,2)

p1 <- ggplot(merged)
p1 <- p1 + geom_point(aes_string(x=x, y=params$selected_gene))
p1 <- p1 + xlab(paste0(x, ", weight"))+ylab(paste0(params$selected_gene," expression (TPM)"))
p1 <- p1 + ggtitle(paste0("All patients, ",mycancer,", COSMICv3"))
data.label <- data.frame(label=paste("Pearson r = ", round(cor_POLQ,2)," (r2 = ", round(cor_POLQ*cor_POLQ,2),")",sep=""), x=max(merged[,x],na.rm=T)*0.7, y=max(merged[,params$selected_gene])*0.7)
p1 <- p1 + geom_text(data = data.label, aes(x = x , y = y , label = label ),hjust=0,size=5)
p1 <- p1 + geom_smooth(aes_string(x=x, y=params$selected_gene), method=lm, se=FALSE, color="black")
p1 <- p1 + theme_classic()
p1 <- p1 + theme(axis.text = element_text(colour = "black", size = rel(0.7)), axis.ticks = element_line(colour = "black", size = 0.25), plot.margin=margin(10,10,6,10), plot.title=element_text(colour = "black", size = rel(1)),legend.justification=c(1,1),legend.position=c(1,1),legend.title = element_blank(),legend.key = element_rect(fill = "gray"))
print(p1)

cor_POLQ3  
}  else {
cor_POLQ3=NA
cor_POLQ3
}

})
cor_POLQ3=nocommnet
} else {
cor_POLQ3 = rep(NA, 66) 
}
}
}

COSMICv3 signatures extracted from WES using deconstructSigs, weights

COAD

PRAD

load('./data/tcga_SBS2021.RData')
tcga=as.data.frame(tcga)
rownames(tcga)=gsub("\\.", "-",rownames(tcga) )
merged=do.call(rbind,params$selected_data)
merged=as.data.frame(merged)
merged$submitter_id=as.character(merged$submitter_id)
merged=merge(merged, tcga, by.x="submitter_id", by.y="row.names")
if(any(unique(merged$disease) %in% cancers)){
  
    cat('\n')  
    cat('\n') 
      cat(paste0('# COSMICv3 signatures extracted from WES using deconstructSigs, counts')) 
      cat('\n') 
      
summary_table = list()

for(mycancer in cancers){
subset1=merged[merged$disease==mycancer,]

    cat('\n')  
    cat('\n') 
      cat(paste0('## ',mycancer)) 
      cat('\n') 

if(nrow(subset1)>9){
nocommnet <- sapply(colnames(tcga), function(x) {
if(mean(subset1[,x]!=0)){
cor_POLQ <- cor(subset1[,params$selected_gene],subset1[,x],use="complete.obs")
cor_POLQ3 = round(cor_POLQ,2)

p1 <- ggplot(subset1)
p1 <- p1 + geom_point(aes_string(x=x, y=params$selected_gene))
p1 <- p1 + xlab(paste0(x, ", count"))+ylab(paste0(params$selected_gene," expression (TPM)"))
p1 <- p1 + ggtitle(paste0("All patients, ",mycancer,", COSMICv3"))
data.label <- data.frame(label=paste("Pearson r = ", round(cor_POLQ,2)," (r2 = ", round(cor_POLQ*cor_POLQ,2),")",sep=""), x=max(subset1[,x],na.rm=T)*0.7, y=max(subset1[,params$selected_gene])*0.7)
p1 <- p1 + geom_text(data = data.label, aes(x = x , y = y , label = label ),hjust=0,size=5)
p1 <- p1 + geom_smooth(aes_string(x=x, y=params$selected_gene), method=lm, se=FALSE, color="black")
p1 <- p1 + theme_classic()
p1 <- p1 + theme(axis.text = element_text(colour = "black", size = rel(0.7)), axis.ticks = element_line(colour = "black", size = 0.25), plot.margin=margin(10,10,6,10), plot.title=element_text(colour = "black", size = rel(1)),legend.justification=c(1,1),legend.position=c(1,1),legend.title = element_blank(),legend.key = element_rect(fill = "gray"))
print(p1)

cor_POLQ3  
}  else {
cor_POLQ3=NA
cor_POLQ3
}

})
cor_POLQ3=nocommnet
} else {
cor_POLQ3 = rep(NA, 66) 
}
}
}

COSMICv3 signatures extracted from WES using deconstructSigs, counts

COAD

PRAD

SBS = fread("./data/TCGA_WES_sigProfiler_SBS_signatures_in_samples.csv")
SBS$SampleID=substr(SBS$'Sample Names',1,12)
SBS=SBS[!duplicated(SBS$SampleID),]

merged=do.call(rbind,params$selected_data)
merged=as.data.frame(merged)
merged$submitter_id=as.character(merged$submitter_id)
merged=merge(merged, SBS, by.x="submitter_id", by.y="SampleID")

if(any(unique(merged$disease) %in% cancers)){
  
    cat('\n')  
    cat('\n') 
      cat(paste0('# COSMICv3 signatures extracted from WES, using sigProfiler data from the original publication')) 
      cat('\n') 
      
summary_table = list()

for(mycancer in cancers){
subset1=merged[merged$disease==mycancer,]

    cat('\n')  
    cat('\n') 
      cat(paste0('## ',mycancer)) 
      cat('\n') 

if(nrow(subset1)>9){
nocommnet <- sapply(colnames(SBS)[4:68], function(x) {
if(mean(subset1[,x]!=0)){
cor_POLQ <- cor(subset1[,params$selected_gene],subset1[,x],use="complete.obs")
cor_POLQ3 = round(cor_POLQ,2)

p1 <- ggplot(subset1)
p1 <- p1 + geom_point(aes_string(x=x, y=params$selected_gene))
p1 <- p1 + xlab(paste0(x, ", count"))+ylab(paste0(params$selected_gene," expression (TPM)"))
p1 <- p1 + ggtitle(paste0("All patients, ",mycancer,", COSMICv3"))
data.label <- data.frame(label=paste("Pearson r = ", round(cor_POLQ,2)," (r2 = ", round(cor_POLQ*cor_POLQ,2),")",sep=""), x=max(subset1[,x],na.rm=T)*0.7, y=max(subset1[,params$selected_gene])*0.7)
p1 <- p1 + geom_text(data = data.label, aes(x = x , y = y , label = label ),hjust=0,size=5)
p1 <- p1 + geom_smooth(aes_string(x=x, y=params$selected_gene), method=lm, se=FALSE, color="black")
p1 <- p1 + theme_classic()
p1 <- p1 + theme(axis.text = element_text(colour = "black", size = rel(0.7)), axis.ticks = element_line(colour = "black", size = 0.25), plot.margin=margin(10,10,6,10), plot.title=element_text(colour = "black", size = rel(1)),legend.justification=c(1,1),legend.position=c(1,1),legend.title = element_blank(),legend.key = element_rect(fill = "gray"))
print(p1)

cor_POLQ3  
}  else {
cor_POLQ3=NA
cor_POLQ3
}

})
cor_POLQ3=nocommnet
} else {
cor_POLQ3 = rep(NA, 65) 
}
}
}

COSMICv3 signatures extracted from WES, using sigProfiler data from the original publication

COAD

PRAD