TCGA Analysis

Author

Ahmed-Amine Anzali, Institut Gustave Roussy, INSERM U1015, Ecole Polytechnique

library(dplyr)
library(ggplot2)

2 Retrieving Clinical Data

#Reading clinical data
clinical <- read.csv('Clinical/TCGA_HNSC_Surival.csv', row.names = 1, sep = ';')

#Adding clinical data descriptions
#Keratin state
clinical <- clinical %>% mutate(K_state = case_when(
  as.numeric(as.character(clinical$K)) >= 10 ~ "Keratin High",
  as.numeric(as.character(clinical$K)) < 10 ~ "Keratin Low")
  )
#MGC Content
clinical <- clinical %>% mutate(MGC_Three = case_when(
  as.numeric(as.character(clinical$MCG_total_mm2)) >= 1 ~ "MGC High",
  as.numeric(as.character(clinical$MCG_total_mm2)) < 0.2 ~ "MGC Low",
  as.numeric(as.character(clinical$MCG_total_mm2)) < 1 & as.numeric(as.character(clinical$MCG_total_mm2)) >= 0.2 ~ "MGC Medium")
  )

3 DEG analysis using DESEq2

#Reading RNAseq data
rna <- read.csv('RNAseq/Tumor_HNSC_bulk_PreProcessed.csv', row.names = 1)
rna <- type.convert(rna, as.is = TRUE)
colnames(rna) <- gsub('\\.','-',substr(colnames(rna),1,12))
#Keeping only relevant bulk data
rna <- rna[,colnames(rna) %in% rownames(clinical)]
#Keep  only the patients with available bulk RNA seq
clinical_rna <- clinical[rownames(clinical) %in% colnames(rna),]
#Need to reorder the colnames and rownames to run DESeq2
rna = rna[,order(colnames(rna))]
clinical_rna = clinical_rna[order(rownames(clinical_rna)),]

3.1 Running DESEq2 model

#Run DESEq2 model
dds <- DESeq2::DESeqDataSetFromMatrix(countData = rna,
                                      colData = clinical_rna,
                                      design  =  ~ MGC_Three)
dds <- DESeq2::DESeq(dds)

DESeq2::resultsNames(dds) # lists the coefficients
[1] "Intercept"                        "MGC_Three_MGC.Low_vs_MGC.High"   
[3] "MGC_Three_MGC.Medium_vs_MGC.High"
dds_vst <- DESeq2::vst(dds)
matrix_vst <- SummarizedExperiment::assay(dds_vst)
#Retrieving results
res <- DESeq2::results(dds, name="MGC_Three_MGC.Low_vs_MGC.High")
#Change sign of Log2Fold Change
res$log2FoldChange  = -res$log2FoldChange
View(as.data.frame(res))
#Save the DEGs into  a csv file
write.csv(as.data.frame(res), file="MGC_High_Vs_MGC_Low_DEGs.csv")

3.2 Volcano Plot

EnhancedVolcano::EnhancedVolcano(res,
                lab = rownames(res),
                x = "log2FoldChange",
                y = "padj",
                title = "",
                subtitle = "MGC Low vs. MGC High",
                selectLab = c("MARCO",
                              "CHIT1",
                              "DCSTAMP",
                              "ARG1",
                              "FBP1",
                              "SPP1",
                              "LPL",
                              "ITGAX",
                              "OSCAR",
                              "SULT1C2"),
                xlab = bquote(~Log[2]~ "fold change"),
                ylab = bquote(~-Log[10]~adjusted~italic(P)),
                pCutoff = 0.05,
                FCcutoff = 0.25,
                colAlpha = 1,
                legendPosition = "",
                legendLabSize = 10,
                legendIconSize = 3.0,
                border = "full",
                borderColour = "black",
                gridlines.major = FALSE,
                gridlines.minor = FALSE,
                drawConnectors = TRUE,
                widthConnectors = 1,
                colConnectors = 'black',
                ylim = c(0,6) ,
                boxedLabels = TRUE,
                labCol = 'black',
                labFace = 'bold')

3.3 MA Plot

ggpubr::ggmaplot(res, main = expression("MGC High" %->% "MGC Low"),
   fdr = 0.05, fc = 0.25, size = 0.4,
   palette = c("#B31B21", "#1465AC", "darkgray"),
   genenames = as.vector(res$name),
   legend = "top", top = 20,
   font.label = c("bold", 11), label.rectangle = TRUE,
   font.legend = "bold",
   font.main = "bold",
   ggtheme = ggplot2::theme_minimal())

3.4 Bulk RNAseq signature genes

# Convert 'res' to a data frame
res <- as.data.frame(res)

# Select rows with significant gene data based on criteria
# Keep rows where 'padj' is less than 0.05, 'padj' is not missing (not NA), and 'log2FoldChange' is greater than 0.25
signif_res <- res[res$padj < 0.05 & !is.na(res$padj) & res$log2FoldChange > 0.25, ]

# Extract the gene names (row names) from the 'signif_res' data frame
signif_res <- rownames(signif_res)
print(signif_res)
 [1] "MATK"          "TENM1"         "MARCO"         "FAM184B"      
 [5] "PTGER3"        "POLB"          "NECAB2"        "ABCC1"        
 [9] "POMC"          "MMP8"          "ARG1"          "SPP1"         
[13] "PLA2G5"        "NCAN"          "CHIT1"         "TAF8"         
[17] "FAM8A1"        "FGFBP2"        "DNASE2B"       "KCNH5"        
[21] "SLC27A2"       "ITGAX"         "ZNF18"         "WIF1"         
[25] "HK3"           "ARL5A"         "PITX2"         "DCSTAMP"      
[29] "FBP1"          "HTRA4"         "OSCAR"         "NMRAL1P1"     
[33] "CYP4F11"       "FRG2C"         "LPL"           "BDNF"         
[37] "RP11-218M22.1" "PRSS36"        "KRT76"         "CYP4F3"       
[41] "MPPED1"        "ATP13A5"       "FAM69C"        "KRT77"        
[45] "SIGLEC15"      "AADACL2"       "SULT1C2"       "TMEM116"      
[49] "CES1"          "C6orf15"       "RP11-699L21.1" "LINC01272"    
[53] "RP1-223B1.1"   "AC079305.8"    "RP1-27K12.2"   "RP11-288G3.4" 
[57] "ESRRAP2"       "DUX4L26"       "LINC01010"     "RP11-431M7.3" 
[61] "LINC00942"     "RP11-20D14.6"  "RP11-21L23.4"  "CAP2P1"       
[65] "RP11-384L8.1"  "CTD-2135D7.5"  "AC005336.4"    "LINC01224"    
[69] "RP1-47A17.1"   "RP11-81A22.4" 
# Write the selected gene names to a CSV file named 'signatures_genes_bulk_RNAseq.csv'
write.csv(signif_res, 'signatures_genes_bulk_RNAseq.csv')

3.5 Common gene signature with Visium Spots

# Helper function to display Venn diagram
display_venn <- function(x, ...){
  library(VennDiagram)
  grid.newpage()
  venn_object <- venn.diagram(x, filename = NULL, ...)
  grid.draw(venn_object)
}

Visium_MGC_Signature <- readr::read_csv("Visium_MGC_Signature.csv")
MGC_Markers <- Visium_MGC_Signature$...1

x <- list(MGC_Markers,signif_res)
# Further customization
display_venn(
  x,
  category.names = c("Visium","Bulk"),
  # Circles
  lwd = 1,
  fill = c("#999999", "#E69F00"),
  # Numbers
  cex = 2,
  # Set names
  cat.cex = 1,
  cat.fontface = "bold",
  cat.default.pos = "outer")

3.6 Gene Ontology based on DEGs

# Create background dataset for hypergeometric testing using all genes tested for
#significance in the results
all_genes <- as.character(rownames(res))
# Extract significant results
signif_res <- res[res$padj < 0.05 & !is.na(res$padj) & res$log2FoldChange > 0.25, ]
signif_genes <- as.character(rownames(signif_res))

# Run GO enrichment analysis
ego <- clusterProfiler::enrichGO(gene = signif_genes,
                universe = all_genes,
                keyType = "SYMBOL",
                OrgDb = org.Hs.eg.db::org.Hs.eg.db,
                ont = "BP",
                pAdjustMethod = "BH",
                qvalueCutoff = 0.05,
                readable = TRUE)
# Output results from GO analysis to a table
cluster_summary <- data.frame(ego)
gg1 <- ggplot(cluster_summary,
              aes(x = reorder(Description,p.adjust), y = -log10(p.adjust), size = -log10(p.adjust), fill = -log10(p.adjust))) +
  
  expand_limits(y = 1) +
  geom_point(shape = 21) +
  scale_size(range = c(2.5,12.5)) +
  scale_fill_continuous(low = 'royalblue', high = 'red4') +
  
  xlab('') + ylab('Enrichment score') +
  labs(
    title = 'GO Biological processes',
    subtitle = 'Enriched Terms',
    caption = 'Cut-off lines drawn at equivalents of p=0.05, p=0.01, p=0.001') +
  
  geom_hline(yintercept = c(-log10(0.05), -log10(0.01), -log10(0.001)),
             linetype = c("dotted", "longdash", "solid"),
             colour = c("black", "black", "black"),
             size = c(0.5, 1.5, 3)) +
  
  theme_bw(base_size = 24) +
  theme(
    legend.position = 'right',
    legend.background = element_rect(),
    plot.title = element_text(angle = 0, size = 16, face = 'bold', vjust = 1),
    plot.subtitle = element_text(angle = 0, size = 14, face = 'bold', vjust = 1),
    plot.caption = element_text(angle = 0, size = 12, face = 'bold', vjust = 1),
    
    axis.text.x = element_text(angle = 0, size = 12, face = 'bold', hjust = 1.10),
    axis.text.y = element_text(angle = 0, size = 12, face = 'bold', vjust = 0.5,color = cluster_summary$TopicNum),
    axis.title = element_text(size = 12, face = 'bold'),
    axis.title.x = element_text(size = 12, face = 'bold'),
    axis.title.y = element_text(size = 12, face = 'bold'),
    axis.line = element_line(colour = 'black'),
    
    #Legend
    legend.key = element_blank(), # removes the border
    legend.key.size = unit(1, "cm"), # Sets overall area/size of the legend
    legend.text = element_text(size = 14, face = "bold"), # Text size
    title = element_text(size = 14, face = "bold")) +
  
  coord_flip()

gg1

#To color genes by log2 fold changes
signif_res_lFC <- signif_res$log2FoldChange
clusterProfiler::cnetplot(ego,
         categorySize="pvalue",
         showCategory = 12,
         foldChange= signif_res_lFC,
         vertex.label.font=6) 

4 Session Info

sessioninfo::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.2.1 (2022-06-23)
 os       macOS Big Sur ... 10.16
 system   x86_64, darwin17.0
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       Europe/Paris
 date     2024-01-23
 pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 package              * version   date (UTC) lib source
 abind                  1.4-5     2016-07-21 [1] CRAN (R 4.2.0)
 annotate               1.74.0    2022-04-26 [1] Bioconductor
 AnnotationDbi          1.58.0    2022-04-26 [1] Bioconductor
 ape                    5.7-1     2023-03-13 [1] CRAN (R 4.2.0)
 aplot                  0.1.10    2023-03-08 [1] CRAN (R 4.2.0)
 backports              1.4.1     2021-12-13 [1] CRAN (R 4.2.0)
 Biobase                2.58.0    2022-11-01 [1] Bioconductor
 BiocGenerics           0.44.0    2022-11-01 [1] Bioconductor
 BiocParallel           1.32.5    2022-12-23 [1] Bioconductor
 Biostrings             2.64.1    2022-08-18 [1] Bioconductor
 bit                    4.0.5     2022-11-15 [1] CRAN (R 4.2.0)
 bit64                  4.0.5     2020-08-30 [1] CRAN (R 4.2.0)
 bitops                 1.0-7     2021-04-24 [1] CRAN (R 4.2.0)
 blob                   1.2.4     2023-03-17 [1] CRAN (R 4.2.0)
 broom                  1.0.5     2023-06-09 [1] CRAN (R 4.2.0)
 cachem                 1.0.8     2023-05-01 [1] CRAN (R 4.2.0)
 car                    3.1-2     2023-03-30 [1] CRAN (R 4.2.0)
 carData                3.0-5     2022-01-06 [1] CRAN (R 4.2.0)
 cli                    3.6.1     2023-03-23 [1] CRAN (R 4.2.0)
 clusterProfiler        4.4.4     2022-06-21 [1] Bioconductor
 codetools              0.2-19    2023-02-01 [1] CRAN (R 4.2.0)
 colorspace             2.1-0     2023-01-23 [1] CRAN (R 4.2.0)
 crayon                 1.5.2     2022-09-29 [1] CRAN (R 4.2.0)
 data.table             1.14.8    2023-02-17 [1] CRAN (R 4.2.0)
 DBI                    1.1.3     2022-06-18 [1] CRAN (R 4.2.0)
 DelayedArray           0.24.0    2022-11-01 [1] Bioconductor
 DESeq2                 1.36.0    2022-04-26 [1] Bioconductor
 digest                 0.6.32    2023-06-26 [1] CRAN (R 4.2.0)
 DO.db                  2.9       2022-10-07 [1] Bioconductor
 DOSE                   3.22.1    2022-08-30 [1] Bioconductor
 downloader             0.4       2015-07-09 [1] CRAN (R 4.2.0)
 dplyr                * 1.1.2     2023-04-20 [1] CRAN (R 4.2.0)
 EnhancedVolcano        1.14.0    2022-04-26 [1] Bioconductor
 enrichplot             1.16.2    2022-08-30 [1] Bioconductor
 evaluate               0.21      2023-05-05 [1] CRAN (R 4.2.0)
 fansi                  1.0.4     2023-01-22 [1] CRAN (R 4.2.1)
 farver                 2.1.1     2022-07-06 [1] CRAN (R 4.2.0)
 fastmap                1.1.1     2023-02-24 [1] CRAN (R 4.2.0)
 fastmatch              1.1-3     2021-07-23 [1] CRAN (R 4.2.0)
 fgsea                  1.22.0    2022-04-26 [1] Bioconductor
 formatR                1.14      2023-01-17 [1] CRAN (R 4.2.0)
 futile.logger        * 1.4.3     2016-07-10 [1] CRAN (R 4.2.0)
 futile.options         1.0.1     2018-04-20 [1] CRAN (R 4.2.0)
 genefilter             1.78.0    2022-04-26 [1] Bioconductor
 geneplotter            1.74.0    2022-04-26 [1] Bioconductor
 generics               0.1.3     2022-07-05 [1] CRAN (R 4.2.0)
 GenomeInfoDb           1.34.9    2023-02-02 [1] Bioconductor
 GenomeInfoDbData       1.2.9     2023-02-27 [1] Bioconductor
 GenomicRanges          1.50.2    2022-12-16 [1] Bioconductor
 ggforce                0.4.1     2022-10-04 [1] CRAN (R 4.2.0)
 ggfun                  0.1.1     2023-06-24 [1] CRAN (R 4.2.0)
 ggnewscale             0.4.9     2023-05-25 [1] CRAN (R 4.2.0)
 ggplot2              * 3.4.3     2023-08-14 [1] CRAN (R 4.2.1)
 ggplotify              0.1.1     2023-06-27 [1] CRAN (R 4.2.0)
 ggpubr                 0.6.0     2023-02-10 [1] CRAN (R 4.2.0)
 ggraph                 2.1.0     2022-10-09 [1] CRAN (R 4.2.1)
 ggrepel                0.9.3     2023-02-03 [1] CRAN (R 4.2.0)
 ggsignif               0.6.4     2022-10-13 [1] CRAN (R 4.2.1)
 ggtree                 3.4.4     2022-09-27 [1] Bioconductor
 glue                   1.6.2     2022-02-24 [1] CRAN (R 4.2.0)
 GO.db                  3.15.0    2022-10-07 [1] Bioconductor
 GOSemSim               2.22.0    2022-04-26 [1] Bioconductor
 graphlayouts           1.0.0     2023-05-01 [1] CRAN (R 4.2.0)
 gridExtra              2.3       2017-09-09 [1] CRAN (R 4.2.0)
 gridGraphics           0.5-1     2020-12-13 [1] CRAN (R 4.2.0)
 gtable                 0.3.4     2023-08-21 [1] CRAN (R 4.2.1)
 hms                    1.1.3     2023-03-21 [1] CRAN (R 4.2.0)
 htmltools              0.5.5     2023-03-23 [1] CRAN (R 4.2.0)
 htmlwidgets            1.6.2     2023-03-17 [1] CRAN (R 4.2.0)
 httr                   1.4.6     2023-05-08 [1] CRAN (R 4.2.0)
 igraph                 1.5.0     2023-06-16 [1] CRAN (R 4.2.0)
 IRanges                2.32.0    2022-11-01 [1] Bioconductor
 jsonlite               1.8.7     2023-06-29 [1] CRAN (R 4.2.0)
 KEGGREST               1.36.3    2022-07-14 [1] Bioconductor
 knitr                  1.43      2023-05-25 [1] CRAN (R 4.2.0)
 labeling               0.4.3     2023-08-29 [1] CRAN (R 4.2.1)
 lambda.r               1.2.4     2019-09-18 [1] CRAN (R 4.2.0)
 lattice                0.21-8    2023-04-05 [1] CRAN (R 4.2.0)
 lazyeval               0.2.2     2019-03-15 [1] CRAN (R 4.2.0)
 lifecycle              1.0.3     2022-10-07 [1] CRAN (R 4.2.1)
 locfit                 1.5-9.8   2023-06-11 [1] CRAN (R 4.2.0)
 magrittr               2.0.3     2022-03-30 [1] CRAN (R 4.2.0)
 MASS                   7.3-60    2023-05-04 [1] CRAN (R 4.2.0)
 Matrix                 1.5-4.1   2023-05-18 [1] CRAN (R 4.2.0)
 MatrixGenerics         1.10.0    2022-11-01 [1] Bioconductor
 matrixStats            1.0.0     2023-06-02 [1] CRAN (R 4.2.0)
 memoise                2.0.1     2021-11-26 [1] CRAN (R 4.2.0)
 munsell                0.5.0     2018-06-12 [1] CRAN (R 4.2.0)
 nlme                   3.1-162   2023-01-31 [1] CRAN (R 4.2.0)
 org.Hs.eg.db           3.15.0    2022-10-07 [1] Bioconductor
 patchwork              1.1.2     2022-08-19 [1] CRAN (R 4.2.0)
 pillar                 1.9.0     2023-03-22 [1] CRAN (R 4.2.0)
 pkgconfig              2.0.3     2019-09-22 [1] CRAN (R 4.2.0)
 plyr                   1.8.8     2022-11-11 [1] CRAN (R 4.2.0)
 png                    0.1-8     2022-11-29 [1] CRAN (R 4.2.0)
 polyclip               1.10-4    2022-10-20 [1] CRAN (R 4.2.0)
 purrr                  1.0.1     2023-01-10 [1] CRAN (R 4.2.0)
 qvalue                 2.30.0    2022-11-01 [1] Bioconductor
 R6                     2.5.1     2021-08-19 [1] CRAN (R 4.2.0)
 RColorBrewer           1.1-3     2022-04-03 [1] CRAN (R 4.2.0)
 Rcpp                   1.0.11    2023-07-06 [1] CRAN (R 4.2.1)
 RCurl                  1.98-1.12 2023-03-27 [1] CRAN (R 4.2.0)
 readr                  2.1.4     2023-02-10 [1] CRAN (R 4.2.0)
 reshape2               1.4.4     2020-04-09 [1] CRAN (R 4.2.0)
 rlang                  1.1.1     2023-04-28 [1] CRAN (R 4.2.0)
 rmarkdown              2.23      2023-07-01 [1] CRAN (R 4.2.0)
 RSQLite                2.3.1     2023-04-03 [1] CRAN (R 4.2.0)
 rstatix                0.7.2     2023-02-01 [1] CRAN (R 4.2.0)
 rstudioapi             0.14      2022-08-22 [1] CRAN (R 4.2.0)
 S4Vectors              0.36.2    2023-02-26 [1] Bioconductor
 scales                 1.2.1     2022-08-20 [1] CRAN (R 4.2.0)
 scatterpie             0.2.1     2023-06-07 [1] CRAN (R 4.2.0)
 sessioninfo            1.2.2     2021-12-06 [1] CRAN (R 4.2.0)
 shadowtext             0.1.2     2022-04-22 [1] CRAN (R 4.2.0)
 stringi                1.7.12    2023-01-11 [1] CRAN (R 4.2.0)
 stringr                1.5.0     2022-12-02 [1] CRAN (R 4.2.0)
 SummarizedExperiment   1.28.0    2022-11-01 [1] Bioconductor
 survival               3.5-5     2023-03-12 [1] CRAN (R 4.2.0)
 tibble                 3.2.1     2023-03-20 [1] CRAN (R 4.2.0)
 tidygraph              1.2.3     2023-02-01 [1] CRAN (R 4.2.0)
 tidyr                  1.3.0     2023-01-24 [1] CRAN (R 4.2.0)
 tidyselect             1.2.0     2022-10-10 [1] CRAN (R 4.2.1)
 tidytree               0.4.2     2022-12-18 [1] CRAN (R 4.2.0)
 treeio                 1.20.2    2022-08-14 [1] Bioconductor
 tweenr                 2.0.2     2022-09-06 [1] CRAN (R 4.2.0)
 tzdb                   0.4.0     2023-05-12 [1] CRAN (R 4.2.0)
 utf8                   1.2.3     2023-01-31 [1] CRAN (R 4.2.0)
 vctrs                  0.6.3     2023-06-14 [1] CRAN (R 4.2.0)
 VennDiagram          * 1.7.3     2022-04-12 [1] CRAN (R 4.2.0)
 viridis                0.6.3     2023-05-03 [1] CRAN (R 4.2.0)
 viridisLite            0.4.2     2023-05-02 [1] CRAN (R 4.2.0)
 vroom                  1.6.3     2023-04-28 [1] CRAN (R 4.2.0)
 withr                  2.5.1     2023-09-26 [1] CRAN (R 4.2.1)
 xfun                   0.39      2023-04-20 [1] CRAN (R 4.2.0)
 XML                    3.99-0.14 2023-03-19 [1] CRAN (R 4.2.0)
 xtable                 1.8-4     2019-04-21 [1] CRAN (R 4.2.0)
 XVector                0.38.0    2022-11-01 [1] Bioconductor
 yaml                   2.3.7     2023-01-23 [1] CRAN (R 4.2.0)
 yulab.utils            0.0.6     2022-12-20 [1] CRAN (R 4.2.0)
 zlibbioc               1.44.0    2022-11-01 [1] Bioconductor

 [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library

──────────────────────────────────────────────────────────────────────────────