Welcome to this large-scale RMarkdown re-analysis. For now, I’ve kept the code in its full form rather than refactoring into functions. This keeps the workflow transparent, though function-based simplification may be added later. The entire repository is fully reproducible via the ‘renv.lock’ file. Below, you’ll also find my session information for reference:
sessionInfo()
## R version 4.4.2 (2024-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=French_France.utf8 LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=French_France.utf8 LC_NUMERIC=C
## [5] LC_TIME=French_France.utf8
##
## time zone: Europe/Paris
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices datasets utils methods
## [8] base
##
## other attached packages:
## [1] SummarizedExperiment_1.36.0 Biobase_2.66.0
## [3] GenomicRanges_1.58.0 GenomeInfoDb_1.42.3
## [5] IRanges_2.40.1 S4Vectors_0.44.0
## [7] BiocGenerics_0.52.0 MatrixGenerics_1.18.1
## [9] matrixStats_1.5.0 TCGAbiolinks_2.34.1
## [11] colorspace_2.1-2 shiny_1.11.1
## [13] DT_0.33 ggrepel_0.9.6
## [15] ggforce_0.5.0 plotly_4.11.0
## [17] rstatix_0.7.2 ggpubr_0.6.0
## [19] RColorBrewer_1.1-3 viridis_0.6.5
## [21] viridisLite_0.4.2 lubridate_1.9.4
## [23] forcats_1.0.1 stringr_1.5.1
## [25] dplyr_1.1.4 purrr_1.0.4
## [27] readr_2.1.5 tidyr_1.3.1
## [29] tibble_3.2.1 tidyverse_2.0.0
## [31] ggplot2_4.0.0 readxl_1.4.5
##
## loaded via a namespace (and not attached):
## [1] DBI_1.2.3 httr2_1.2.1
## [3] gridExtra_2.3 biomaRt_2.62.1
## [5] rlang_1.1.5 magrittr_2.0.3
## [7] RSQLite_2.4.3 compiler_4.4.2
## [9] png_0.1-8 vctrs_0.6.5
## [11] rvest_1.0.5 pkgconfig_2.0.3
## [13] crayon_1.5.3 fastmap_1.2.0
## [15] dbplyr_2.5.1 backports_1.5.0
## [17] XVector_0.46.0 promises_1.3.2
## [19] rmarkdown_2.30 tzdb_0.5.0
## [21] UCSC.utils_1.2.0 bit_4.6.0
## [23] xfun_0.53 zlibbioc_1.52.0
## [25] cachem_1.1.0 jsonlite_1.9.1
## [27] progress_1.2.3 blob_1.2.4
## [29] later_1.4.1 DelayedArray_0.32.0
## [31] tweenr_2.0.3 prettyunits_1.2.0
## [33] broom_1.0.10 R6_2.6.1
## [35] bslib_0.9.0 stringi_1.8.4
## [37] car_3.1-3 jquerylib_0.1.4
## [39] cellranger_1.1.0 Rcpp_1.0.14
## [41] knitr_1.50 downloader_0.4.1
## [43] httpuv_1.6.15 Matrix_1.7-1
## [45] timechange_0.3.0 tidyselect_1.2.1
## [47] abind_1.4-8 yaml_2.3.10
## [49] curl_7.0.0 plyr_1.8.9
## [51] lattice_0.22-6 KEGGREST_1.46.0
## [53] withr_3.0.2 S7_0.2.0
## [55] evaluate_1.0.5 BiocFileCache_2.14.0
## [57] polyclip_1.10-7 xml2_1.4.0
## [59] Biostrings_2.74.1 filelock_1.0.3
## [61] pillar_1.11.1 BiocManager_1.30.26
## [63] carData_3.0-5 renv_1.1.5
## [65] generics_0.1.4 hms_1.1.3
## [67] scales_1.4.0 xtable_1.8-4
## [69] glue_1.8.0 lazyeval_0.2.2
## [71] tools_4.4.2 data.table_1.17.0
## [73] ggsignif_0.6.4 XML_3.99-0.19
## [75] grid_4.4.2 AnnotationDbi_1.68.0
## [77] GenomeInfoDbData_1.2.13 Formula_1.2-5
## [79] cli_3.6.4 rappdirs_0.3.3
## [81] S4Arrays_1.6.0 gtable_0.3.6
## [83] TCGAbiolinksGUI.data_1.26.0 sass_0.4.9
## [85] digest_0.6.37 SparseArray_1.6.2
## [87] htmlwidgets_1.6.4 farver_2.1.2
## [89] memoise_2.0.1 htmltools_0.5.8.1
## [91] lifecycle_1.0.4 httr_1.4.7
## [93] mime_0.12 bit64_4.6.0-1
## [95] MASS_7.3-61
This page is an interactive document generated from RMarkdown analyses. You can click on the graphs to explore the data, filter the tables, and navigate by histological type using the tabs. For technical details, see the source code available on GitHub.
The Kaplan–Meier curves presented here show overall survival according to the expression of genes of interest (TELS1, TRF2, ATRX/DAXX). The p-values are derived from log-rank tests. The results should be interpreted with caution given the size of the cohorts.
The color palette used throughout this document is cividis, a perceptually uniform colormap designed to be accessible for readers with color vision deficiencies. Unlike traditional palettes (e.g., “Red–Yellow–Blue”), cividis ensures consistent contrasts across the full range of values, remains interpretable when printed in grayscale, and improves accessibility of figures for a broader audience.
colnames(clinical.supp)[colnames(clinical.supp) == "bcr_patient_barcode"] <- "submitter_id"
merged_clinical <- dplyr::full_join(clinical, clinical.supp, by = "submitter_id") %>%
dplyr::rename(
vital_status = vital_status.x,
days_to_death = days_to_death.x,
)
data.stage2 <- merged_clinical
DT::datatable(
data.stage2,
filter = "top",
options = list(scrollX = TRUE, pageLength = 5, autoWidth = TRUE),
rownames = FALSE
)
ggplotly(ggplot(data.stage2)+aes(x=vital_status,fill=vital_status)+geom_bar()+labs(fill="Vital Status", x="Vital Status", y= "Count")+theme_bw()+scale_fill_viridis_d(option = "cividis")+theme(panel.grid.major = element_blank(), panel.grid.minor =element_blank()))
library(edgeR)
dge <- DGEList(counts = RNA)
dge <- calcNormFactors(dge)
RNA_norm <- cpm(dge, log = TRUE)
RNA_norm <- cpm(dge, log = TRUE)
lms_patients <- data.stage2$submitter_id
RNA_norm <- RNA_norm[, substr(colnames(RNA_norm), 1, 12) %in% lms_patients]
ACTA2 <- RNA_norm["ENSG00000107796.13", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(ACTA2) <- "ACTA2_expr"
ACTA2$patient <- substr(rownames(ACTA2), 1, 12)
ACTA2$ACTA2_group <- ifelse(ACTA2$ACTA2_expr <= median(ACTA2$ACTA2_expr),
"Low", "High")
ACTA2 <- ACTA2[!duplicated(ACTA2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(ACTA2, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(ACTA2, aes(x = "", y = ACTA2_expr, fill = "ACTA2")) +
geom_boxplot() +
labs(fill = "ACTA2 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = ACTA2_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "ACTA2 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
DAXX <- RNA_norm["ENSG00000204209.13", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(DAXX) <- "DAXX_expr"
DAXX$patient <- substr(rownames(DAXX), 1, 12)
DAXX$DAXX_group <- ifelse(DAXX$DAXX_expr <= median(DAXX$DAXX_expr),
"Low", "High")
DAXX <- DAXX[!duplicated(DAXX$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(DAXX, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(DAXX, aes(x = "", y = DAXX_expr, fill = "DAXX")) +
geom_boxplot() +
labs(fill = "DAXX RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = DAXX_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "DAXX Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
genes <- c("ENSG00000085224.23",
"ENSG00000204209.13")
atrx_daxx <- RNA_norm[genes, , drop = FALSE] |>
t() |>
as.data.frame()
colnames(atrx_daxx) <- c("ATRX_expr", "DAXX_expr")
atrx_daxx$patient <- substr(rownames(atrx_daxx), 1, 12)
atrx_daxx$ATRX_group <- ifelse(atrx_daxx$ATRX_expr <= median(atrx_daxx$ATRX_expr),
"Low", "High")
atrx_daxx$DAXX_group <- ifelse(atrx_daxx$DAXX_expr <= median(atrx_daxx$DAXX_expr),
"Low", "High")
atrx_daxx$Combined_group <- dplyr::case_when(
atrx_daxx$ATRX_group == "Low" & atrx_daxx$DAXX_group == "Low" ~ "Both Low",
atrx_daxx$ATRX_group == "High" & atrx_daxx$DAXX_group == "High" ~ "Both High",
TRUE ~ "One High"
)
atrx_daxx <- atrx_daxx[!duplicated(atrx_daxx$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_atrx_daxx <- dplyr::left_join(atrx_daxx, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(atrx_daxx, aes(x = Combined_group, fill = Combined_group)) +
geom_bar() +
labs(fill = "ATRX & DAXX Expression", x = "ATRX & DAXX Groups", y = "Count") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p2 <- ggplot(merged_atrx_daxx, aes(x = Combined_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "ATRX & DAXX Groups", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p2)
library(survival)
library(survminer)
surv_data <- merged_atrx_daxx %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit <- survfit(Surv(OS_time, OS_event) ~ Combined_group, data = surv_data)
km_plot <- ggsurvplot(
fit,
data = surv_data,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "ATRX/DAXX",
legend.labs = levels(factor(surv_data$Combined_group)),
palette = viridis(length(unique(surv_data$Combined_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
km_plot
TELS1 <- RNA_norm["ENSG00000121903.14", , drop = FALSE] |> #grep("121903", rownames(RNA_norm), value = TRUE)
t() |>
as.data.frame()
colnames(TELS1) <- "TELS1_expr"
TELS1$patient <- substr(rownames(TELS1), 1, 12)
TELS1$TELS1_group <- ifelse(TELS1$TELS1_expr <= median(TELS1$TELS1_expr),
"Low", "High")
TELS1 <- TELS1[!duplicated(TELS1$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(TELS1, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(TELS1, aes(x = "", y = TELS1_expr, fill = "TELS1")) +
geom_boxplot() +
labs(fill = "TELS1 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = TELS1_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TELS1 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_tels1 <- dplyr::left_join(TELS1, clin_sub, by = c("patient" = "submitter_id"))
surv_data <- merged_tels1 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit <- survfit(Surv(OS_time, OS_event) ~ TELS1_group, data = surv_data)
km_plot <- ggsurvplot(
fit,
data = surv_data,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TELS1",
legend.labs = levels(factor(surv_data$TELS1_group)),
palette = viridis(length(unique(surv_data$TELS1_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
km_plot
TRF2 <- RNA_norm["ENSG00000132604.11", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(TRF2) <- "TRF2_expr"
TRF2$patient <- substr(rownames(TRF2), 1, 12)
TRF2$TRF2_group <- ifelse(TRF2$TRF2_expr <= median(TRF2$TRF2_expr),
"Low", "High")
TRF2 <- TRF2[!duplicated(TRF2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(TRF2, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(TRF2, aes(x = "", y = TRF2_expr, fill = "TRF2")) +
geom_boxplot() +
labs(fill = "TRF2 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = TRF2_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TRF2 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_trf2 <- dplyr::left_join(TRF2, clin_sub,
by = c("patient" = "submitter_id"))
surv_data_trf2 <- merged_trf2 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit_trf2 <- survfit(Surv(OS_time, OS_event) ~ TRF2_group, data = surv_data_trf2)
ggsurvplot(
fit_trf2,
data = surv_data_trf2,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TRF2",
legend.labs = levels(factor(surv_data_trf2$TRF2_group)),
palette = viridis(length(unique(surv_data_trf2$TRF2_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
tels1_trf2 <- dplyr::inner_join(TELS1, TRF2, by = "patient")
tels1_trf2$Combined_group <- dplyr::case_when(
tels1_trf2$TELS1_group == "Low" & tels1_trf2$TRF2_group == "Low" ~ "Both Low",
tels1_trf2$TELS1_group == "High" & tels1_trf2$TRF2_group == "High" ~ "Both High",
TRUE ~ "One High"
)
tels1_trf2 <- tels1_trf2[!duplicated(tels1_trf2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged_tels1_trf2 <- dplyr::left_join(tels1_trf2, clin_sub, by = c("patient" = "submitter_id"))
p1 <- ggplot(tels1_trf2, aes(x = Combined_group, fill = Combined_group)) +
geom_bar() +
labs(fill = "TELS1 & TRF2 Expression", x = "TELS1 & TRF2 Groups", y = "Count") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p2 <- ggplot(merged_tels1_trf2, aes(x = Combined_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TELS1 & TRF2 Groups", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p2)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_tels1_trf2 <- dplyr::left_join(tels1_trf2, clin_sub, by = c("patient" = "submitter_id"))
surv_data_tels1_trf2 <- merged_tels1_trf2 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit_tels1_trf2 <- survfit(Surv(OS_time, OS_event) ~ Combined_group, data = surv_data_tels1_trf2)
ggsurvplot(
fit_tels1_trf2,
data = surv_data_tels1_trf2,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TELS1/TRF2",
legend.labs = levels(factor(surv_data_tels1_trf2$Combined_group)),
palette = viridis(length(unique(surv_data_tels1_trf2$Combined_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
cor.test(tels1_trf2$TELS1_expr, tels1_trf2$TRF2_expr)
##
## Pearson's product-moment correlation
##
## data: tels1_trf2$TELS1_expr and tels1_trf2$TRF2_expr
## t = -0.40956, df = 257, p-value = 0.6825
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.14697040 0.09665033
## sample estimates:
## cor
## -0.02553922
ggplot(tels1_trf2, aes(x = TELS1_expr, y = TRF2_expr)) +
geom_point(aes(color = Combined_group)) +
geom_smooth(method = "lm", se = FALSE) +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group)
##
## High Low
## High 66 63
## Low 63 67
chisq.test(table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group)
## X-squared = 0.096379, df = 1, p-value = 0.7562
cox_model <- coxph(Surv(OS_time, OS_event) ~ TELS1_group * TRF2_group, data = surv_data_tels1_trf2)
summary(cox_model)
## Call:
## coxph(formula = Surv(OS_time, OS_event) ~ TELS1_group * TRF2_group,
## data = surv_data_tels1_trf2)
##
## n= 259, number of events= 98
##
## coef exp(coef) se(coef) z Pr(>|z|)
## TELS1_groupLow 0.1621 1.1759 0.2830 0.573 0.567
## TRF2_groupLow 0.2848 1.3295 0.2753 1.034 0.301
## TELS1_groupLow:TRF2_groupLow -0.5909 0.5538 0.4072 -1.451 0.147
##
## exp(coef) exp(-coef) lower .95 upper .95
## TELS1_groupLow 1.1759 0.8504 0.6753 2.048
## TRF2_groupLow 1.3295 0.7522 0.7751 2.280
## TELS1_groupLow:TRF2_groupLow 0.5538 1.8056 0.2493 1.230
##
## Concordance= 0.539 (se = 0.032 )
## Likelihood ratio test= 2.51 on 3 df, p=0.5
## Wald test = 2.49 on 3 df, p=0.5
## Score (logrank) test = 2.51 on 3 df, p=0.5
if (!requireNamespace("ComplexHeatmap", quietly = TRUE)) {
BiocManager::install("ComplexHeatmap")
}
suppressPackageStartupMessages(library(ComplexHeatmap))
suppressPackageStartupMessages(library(circlize))
expr_mat <- tels1_trf2[, c("TELS1_expr", "TRF2_expr")]
rownames(expr_mat) <- tels1_trf2$patient
expr_mat <- t(scale(expr_mat))
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
surv_annot <- dplyr::left_join(
data.frame(patient = colnames(expr_mat)),
clin_sub,
by = c("patient" = "submitter_id")
)
surv_annot$OS_time <- ifelse(is.na(surv_annot$days_to_death),
surv_annot$days_to_last_follow_up,
surv_annot$days_to_death)
surv_annot$OS_event <- ifelse(tolower(surv_annot$vital_status) %in% c("dead","deceased"), "Dead", "Alive")
ha <- HeatmapAnnotation(
OS_time = anno_barplot(surv_annot$OS_time, gp = gpar(fill = "grey")),
Status = surv_annot$OS_event,
col = list(Status = c("Alive" = "steelblue", "Dead" = "firebrick"))
)
Heatmap(expr_mat,
name = "Expression (z-score)",
top_annotation = ha,
show_row_names = TRUE,
show_column_names = FALSE,
column_title = "Patients (clustered by TELS1 + TRF2)")
tels1_clinical <- dplyr::left_join(
TELS1,
merged_clinical,
by = c("patient" = "submitter_id")
)
ggplot(merged_clinical, aes(x = age_at_diagnosis/365, fill = vital_status)) +
geom_histogram(binwidth = 5, position = "dodge") +
labs(title = "Age at Diagnosis by Vital Status",
x = "Age (years)", y = "Number of Patients") +
scale_fill_viridis_d(option = "cividis") +
theme_bw()
merged_clinical %>%
dplyr::count(primary_diagnosis) %>%
ggplot(aes(x = reorder(primary_diagnosis, n), y = n, fill = primary_diagnosis)) +
geom_col() +
coord_flip() +
labs(
title = "Primary Diagnosis Distribution",
x = "Primary Diagnosis",
y = "Count"
) +
scale_fill_viridis_d(option = "cividis") +
theme_minimal()
ggplot(merged_clinical, aes(x = gender.x, y = age_at_diagnosis/365, fill = gender.x)) +
geom_boxplot() +
labs(title = "Age at Diagnosis by Gender",
x = "Gender", y = "Age (years)", fill = "Gender") +
scale_fill_viridis_d(option = "cividis") +
theme_bw()
ggplot(tels1_clinical, aes(x = age_at_diagnosis/365, y = TELS1_expr, color = vital_status)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "TELS1 Expression vs Age at Diagnosis",
x = "Age (years)", y = "TELS1 log2 CPM") +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
trf2_clinical <- dplyr::left_join(
TRF2,
merged_clinical,
by = c("patient" = "submitter_id")
)
ggplot(trf2_clinical, aes(x = age_at_diagnosis/365, y = TRF2_expr, color = vital_status)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "TRF2 Expression vs Age at Diagnosis",
x = "Age (years)", y = "TRF2 log2 CPM") +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
if (!requireNamespace("ggalluvial", quietly = TRUE)) {
install.packages("ggalluvial")
}
suppressPackageStartupMessages(library(ggalluvial))
alluvial_df <- merged_tels1_trf2 %>%
dplyr::select(patient, TELS1_group, TRF2_group, vital_status) %>%
dplyr::filter(!is.na(TELS1_group), !is.na(TRF2_group), !is.na(vital_status))
ggplot(alluvial_df,
aes(axis1 = TELS1_group, axis2 = TRF2_group, axis3 = vital_status)) +
geom_alluvium(aes(fill = vital_status), width = 1/12) +
geom_stratum(width = 1/12, fill = "grey80", color = "black") +
geom_text(stat = "stratum", aes(label = paste0(after_stat(stratum), "\n(n=", after_stat(n), ")"))) +
scale_x_discrete(limits = c("TELS1", "TRF2", "Vital Status"), expand = c(.05, .05)) +
scale_fill_viridis_d(option = "cividis") +
theme_bw() +
labs(title = "Alluvial Plot of TELS1/TRF2 Groups and Vital Status")
data.stage2 <- merged_clinical[merged_clinical$primary_pathology_histological_type == "Leiomyosarcoma (LMS)", ]
DT::datatable(
data.stage2,
filter = "top",
options = list(scrollX = TRUE, pageLength = 5, autoWidth = TRUE),
rownames = FALSE
)
ggplotly(ggplot(data.stage2)+aes(x=vital_status,fill=vital_status)+geom_bar()+labs(fill="Vital Status", x="Vital Status", y= "Count")+theme_bw()+scale_fill_viridis_d(option = "cividis")+theme(panel.grid.major = element_blank(), panel.grid.minor =element_blank()))
library(edgeR)
dge <- DGEList(counts = RNA)
dge <- calcNormFactors(dge)
RNA_norm <- cpm(dge, log = TRUE)
RNA_norm <- cpm(dge, log = TRUE)
lms_patients <- data.stage2$submitter_id
RNA_norm <- RNA_norm[, substr(colnames(RNA_norm), 1, 12) %in% lms_patients]
ACTA2 <- RNA_norm["ENSG00000107796.13", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(ACTA2) <- "ACTA2_expr"
ACTA2$patient <- substr(rownames(ACTA2), 1, 12)
ACTA2$ACTA2_group <- ifelse(ACTA2$ACTA2_expr <= median(ACTA2$ACTA2_expr),
"Low", "High")
ACTA2 <- ACTA2[!duplicated(ACTA2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(ACTA2, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(ACTA2, aes(x = "", y = ACTA2_expr, fill = "ACTA2")) +
geom_boxplot() +
labs(fill = "ACTA2 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = ACTA2_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "ACTA2 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
DAXX <- RNA_norm["ENSG00000204209.13", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(DAXX) <- "DAXX_expr"
DAXX$patient <- substr(rownames(DAXX), 1, 12)
DAXX$DAXX_group <- ifelse(DAXX$DAXX_expr <= median(DAXX$DAXX_expr),
"Low", "High")
DAXX <- DAXX[!duplicated(DAXX$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(DAXX, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(DAXX, aes(x = "", y = DAXX_expr, fill = "DAXX")) +
geom_boxplot() +
labs(fill = "DAXX RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = DAXX_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "DAXX Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
genes <- c("ENSG00000085224.23",
"ENSG00000204209.13")
atrx_daxx <- RNA_norm[genes, , drop = FALSE] |>
t() |>
as.data.frame()
colnames(atrx_daxx) <- c("ATRX_expr", "DAXX_expr")
atrx_daxx$patient <- substr(rownames(atrx_daxx), 1, 12)
atrx_daxx$ATRX_group <- ifelse(atrx_daxx$ATRX_expr <= median(atrx_daxx$ATRX_expr),
"Low", "High")
atrx_daxx$DAXX_group <- ifelse(atrx_daxx$DAXX_expr <= median(atrx_daxx$DAXX_expr),
"Low", "High")
atrx_daxx$Combined_group <- dplyr::case_when(
atrx_daxx$ATRX_group == "Low" & atrx_daxx$DAXX_group == "Low" ~ "Both Low",
atrx_daxx$ATRX_group == "High" & atrx_daxx$DAXX_group == "High" ~ "Both High",
TRUE ~ "One High"
)
atrx_daxx <- atrx_daxx[!duplicated(atrx_daxx$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_atrx_daxx <- dplyr::left_join(atrx_daxx, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(atrx_daxx, aes(x = Combined_group, fill = Combined_group)) +
geom_bar() +
labs(fill = "ATRX & DAXX Expression", x = "ATRX & DAXX Groups", y = "Count") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p2 <- ggplot(merged_atrx_daxx, aes(x = Combined_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "ATRX & DAXX Groups", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p2)
library(survival)
library(survminer)
surv_data <- merged_atrx_daxx %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit <- survfit(Surv(OS_time, OS_event) ~ Combined_group, data = surv_data)
km_plot <- ggsurvplot(
fit,
data = surv_data,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "ATRX/DAXX",
legend.labs = levels(factor(surv_data$Combined_group)),
palette = viridis(length(unique(surv_data$Combined_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
km_plot
TELS1 <- RNA_norm["ENSG00000121903.14", , drop = FALSE] |> #grep("121903", rownames(RNA_norm), value = TRUE)
t() |>
as.data.frame()
colnames(TELS1) <- "TELS1_expr"
TELS1$patient <- substr(rownames(TELS1), 1, 12)
TELS1$TELS1_group <- ifelse(TELS1$TELS1_expr <= median(TELS1$TELS1_expr),
"Low", "High")
TELS1 <- TELS1[!duplicated(TELS1$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(TELS1, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(TELS1, aes(x = "", y = TELS1_expr, fill = "TELS1")) +
geom_boxplot() +
labs(fill = "TELS1 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = TELS1_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TELS1 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_tels1 <- dplyr::left_join(TELS1, clin_sub, by = c("patient" = "submitter_id"))
surv_data <- merged_tels1 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit <- survfit(Surv(OS_time, OS_event) ~ TELS1_group, data = surv_data)
km_plot <- ggsurvplot(
fit,
data = surv_data,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TELS1",
legend.labs = levels(factor(surv_data$TELS1_group)),
palette = viridis(length(unique(surv_data$TELS1_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
km_plot
TRF2 <- RNA_norm["ENSG00000132604.11", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(TRF2) <- "TRF2_expr"
TRF2$patient <- substr(rownames(TRF2), 1, 12)
TRF2$TRF2_group <- ifelse(TRF2$TRF2_expr <= median(TRF2$TRF2_expr),
"Low", "High")
TRF2 <- TRF2[!duplicated(TRF2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(TRF2, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(TRF2, aes(x = "", y = TRF2_expr, fill = "TRF2")) +
geom_boxplot() +
labs(fill = "TRF2 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = TRF2_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TRF2 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_trf2 <- dplyr::left_join(TRF2, clin_sub,
by = c("patient" = "submitter_id"))
surv_data_trf2 <- merged_trf2 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit_trf2 <- survfit(Surv(OS_time, OS_event) ~ TRF2_group, data = surv_data_trf2)
ggsurvplot(
fit_trf2,
data = surv_data_trf2,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TRF2",
legend.labs = levels(factor(surv_data_trf2$TRF2_group)),
palette = viridis(length(unique(surv_data_trf2$TRF2_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
tels1_trf2 <- dplyr::inner_join(TELS1, TRF2, by = "patient")
tels1_trf2$Combined_group <- dplyr::case_when(
tels1_trf2$TELS1_group == "Low" & tels1_trf2$TRF2_group == "Low" ~ "Both Low",
tels1_trf2$TELS1_group == "High" & tels1_trf2$TRF2_group == "High" ~ "Both High",
TRUE ~ "One High"
)
tels1_trf2 <- tels1_trf2[!duplicated(tels1_trf2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged_tels1_trf2 <- dplyr::left_join(tels1_trf2, clin_sub, by = c("patient" = "submitter_id"))
p1 <- ggplot(tels1_trf2, aes(x = Combined_group, fill = Combined_group)) +
geom_bar() +
labs(fill = "TELS1 & TRF2 Expression", x = "TELS1 & TRF2 Groups", y = "Count") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p2 <- ggplot(merged_tels1_trf2, aes(x = Combined_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TELS1 & TRF2 Groups", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p2)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_tels1_trf2 <- dplyr::left_join(tels1_trf2, clin_sub, by = c("patient" = "submitter_id"))
surv_data_tels1_trf2 <- merged_tels1_trf2 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit_tels1_trf2 <- survfit(Surv(OS_time, OS_event) ~ Combined_group, data = surv_data_tels1_trf2)
ggsurvplot(
fit_tels1_trf2,
data = surv_data_tels1_trf2,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TELS1/TRF2",
legend.labs = levels(factor(surv_data_tels1_trf2$Combined_group)),
palette = viridis(length(unique(surv_data_tels1_trf2$Combined_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
cor.test(tels1_trf2$TELS1_expr, tels1_trf2$TRF2_expr)
##
## Pearson's product-moment correlation
##
## data: tels1_trf2$TELS1_expr and tels1_trf2$TRF2_expr
## t = -1.9996, df = 102, p-value = 0.04821
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.372857575 -0.001690586
## sample estimates:
## cor
## -0.1942156
ggplot(tels1_trf2, aes(x = TELS1_expr, y = TRF2_expr)) +
geom_point(aes(color = Combined_group)) +
geom_smooth(method = "lm", se = FALSE) +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group)
##
## High Low
## High 24 28
## Low 28 24
chisq.test(table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group)
## X-squared = 0.34615, df = 1, p-value = 0.5563
cox_model <- coxph(Surv(OS_time, OS_event) ~ TELS1_group * TRF2_group, data = surv_data_tels1_trf2)
summary(cox_model)
## Call:
## coxph(formula = Surv(OS_time, OS_event) ~ TELS1_group * TRF2_group,
## data = surv_data_tels1_trf2)
##
## n= 104, number of events= 41
##
## coef exp(coef) se(coef) z Pr(>|z|)
## TELS1_groupLow 0.5465 1.7273 0.4768 1.146 0.252
## TRF2_groupLow 0.6359 1.8888 0.4764 1.335 0.182
## TELS1_groupLow:TRF2_groupLow -0.7351 0.4795 0.6423 -1.145 0.252
##
## exp(coef) exp(-coef) lower .95 upper .95
## TELS1_groupLow 1.7273 0.5790 0.6784 4.398
## TRF2_groupLow 1.8888 0.5295 0.7424 4.805
## TELS1_groupLow:TRF2_groupLow 0.4795 2.0857 0.1362 1.688
##
## Concordance= 0.55 (se = 0.044 )
## Likelihood ratio test= 2.13 on 3 df, p=0.5
## Wald test = 1.94 on 3 df, p=0.6
## Score (logrank) test = 1.98 on 3 df, p=0.6
if (!requireNamespace("ComplexHeatmap", quietly = TRUE)) {
BiocManager::install("ComplexHeatmap")
}
suppressPackageStartupMessages(library(ComplexHeatmap))
suppressPackageStartupMessages(library(circlize))
expr_mat <- tels1_trf2[, c("TELS1_expr", "TRF2_expr")]
rownames(expr_mat) <- tels1_trf2$patient
expr_mat <- t(scale(expr_mat))
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
surv_annot <- dplyr::left_join(
data.frame(patient = colnames(expr_mat)),
clin_sub,
by = c("patient" = "submitter_id")
)
surv_annot$OS_time <- ifelse(is.na(surv_annot$days_to_death),
surv_annot$days_to_last_follow_up,
surv_annot$days_to_death)
surv_annot$OS_event <- ifelse(tolower(surv_annot$vital_status) %in% c("dead","deceased"), "Dead", "Alive")
ha <- HeatmapAnnotation(
OS_time = anno_barplot(surv_annot$OS_time, gp = gpar(fill = "grey")),
Status = surv_annot$OS_event,
col = list(Status = c("Alive" = "steelblue", "Dead" = "firebrick"))
)
Heatmap(expr_mat,
name = "Expression (z-score)",
top_annotation = ha,
show_row_names = TRUE,
show_column_names = FALSE,
column_title = "Patients (clustered by TELS1 + TRF2)")
tels1_clinical <- dplyr::left_join(
TELS1,
merged_clinical,
by = c("patient" = "submitter_id")
)
ggplot(merged_clinical, aes(x = age_at_diagnosis/365, fill = vital_status)) +
geom_histogram(binwidth = 5, position = "dodge") +
labs(title = "Age at Diagnosis by Vital Status",
x = "Age (years)", y = "Number of Patients") +
scale_fill_viridis_d(option = "cividis") +
theme_bw()
ggplot(merged_clinical, aes(x = gender.x, y = age_at_diagnosis/365, fill = gender.x)) +
geom_boxplot() +
labs(title = "Age at Diagnosis by Gender",
x = "Gender", y = "Age (years)", fill = "Gender") +
scale_fill_viridis_d(option = "cividis") +
theme_bw()
ggplot(tels1_clinical, aes(x = age_at_diagnosis/365, y = TELS1_expr, color = vital_status)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "TELS1 Expression vs Age at Diagnosis",
x = "Age (years)", y = "TELS1 log2 CPM") +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
trf2_clinical <- dplyr::left_join(
TRF2,
merged_clinical,
by = c("patient" = "submitter_id")
)
ggplot(trf2_clinical, aes(x = age_at_diagnosis/365, y = TRF2_expr, color = vital_status)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "TRF2 Expression vs Age at Diagnosis",
x = "Age (years)", y = "TRF2 log2 CPM") +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
if (!requireNamespace("ggalluvial", quietly = TRUE)) {
install.packages("ggalluvial")
}
suppressPackageStartupMessages(library(ggalluvial))
alluvial_df <- merged_tels1_trf2 %>%
dplyr::select(patient, TELS1_group, TRF2_group, vital_status) %>%
dplyr::filter(!is.na(TELS1_group), !is.na(TRF2_group), !is.na(vital_status))
ggplot(alluvial_df,
aes(axis1 = TELS1_group, axis2 = TRF2_group, axis3 = vital_status)) +
geom_alluvium(aes(fill = vital_status), width = 1/12) +
geom_stratum(width = 1/12, fill = "grey80", color = "black") +
geom_text(stat = "stratum", aes(label = paste0(after_stat(stratum), "\n(n=", after_stat(n), ")"))) +
scale_x_discrete(limits = c("TELS1", "TRF2", "Vital Status"), expand = c(.05, .05)) +
scale_fill_viridis_d(option = "cividis") +
theme_bw() +
labs(title = "Alluvial Plot of TELS1/TRF2 Groups and Vital Status")
data.stage2 <- merged_clinical[merged_clinical$primary_pathology_histological_type == "Dedifferentiated liposarcoma", ]
DT::datatable(
data.stage2,
filter = "top",
options = list(scrollX = TRUE, pageLength = 5, autoWidth = TRUE),
rownames = FALSE
)
ggplotly(ggplot(data.stage2)+aes(x=vital_status,fill=vital_status)+geom_bar()+labs(fill="Vital Status", x="Vital Status", y= "Count")+theme_bw()+scale_fill_viridis_d(option = "cividis")+theme(panel.grid.major = element_blank(), panel.grid.minor =element_blank()))
library(edgeR)
dge <- DGEList(counts = RNA)
dge <- calcNormFactors(dge)
RNA_norm <- cpm(dge, log = TRUE)
RNA_norm <- cpm(dge, log = TRUE)
lms_patients <- data.stage2$submitter_id
RNA_norm <- RNA_norm[, substr(colnames(RNA_norm), 1, 12) %in% lms_patients]
ACTA2 <- RNA_norm["ENSG00000107796.13", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(ACTA2) <- "ACTA2_expr"
ACTA2$patient <- substr(rownames(ACTA2), 1, 12)
ACTA2$ACTA2_group <- ifelse(ACTA2$ACTA2_expr <= median(ACTA2$ACTA2_expr),
"Low", "High")
ACTA2 <- ACTA2[!duplicated(ACTA2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(ACTA2, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(ACTA2, aes(x = "", y = ACTA2_expr, fill = "ACTA2")) +
geom_boxplot() +
labs(fill = "ACTA2 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = ACTA2_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "ACTA2 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
DAXX <- RNA_norm["ENSG00000204209.13", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(DAXX) <- "DAXX_expr"
DAXX$patient <- substr(rownames(DAXX), 1, 12)
DAXX$DAXX_group <- ifelse(DAXX$DAXX_expr <= median(DAXX$DAXX_expr),
"Low", "High")
DAXX <- DAXX[!duplicated(DAXX$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(DAXX, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(DAXX, aes(x = "", y = DAXX_expr, fill = "DAXX")) +
geom_boxplot() +
labs(fill = "DAXX RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = DAXX_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "DAXX Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
genes <- c("ENSG00000085224.23",
"ENSG00000204209.13")
atrx_daxx <- RNA_norm[genes, , drop = FALSE] |>
t() |>
as.data.frame()
colnames(atrx_daxx) <- c("ATRX_expr", "DAXX_expr")
atrx_daxx$patient <- substr(rownames(atrx_daxx), 1, 12)
atrx_daxx$ATRX_group <- ifelse(atrx_daxx$ATRX_expr <= median(atrx_daxx$ATRX_expr),
"Low", "High")
atrx_daxx$DAXX_group <- ifelse(atrx_daxx$DAXX_expr <= median(atrx_daxx$DAXX_expr),
"Low", "High")
atrx_daxx$Combined_group <- dplyr::case_when(
atrx_daxx$ATRX_group == "Low" & atrx_daxx$DAXX_group == "Low" ~ "Both Low",
atrx_daxx$ATRX_group == "High" & atrx_daxx$DAXX_group == "High" ~ "Both High",
TRUE ~ "One High"
)
atrx_daxx <- atrx_daxx[!duplicated(atrx_daxx$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_atrx_daxx <- dplyr::left_join(atrx_daxx, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(atrx_daxx, aes(x = Combined_group, fill = Combined_group)) +
geom_bar() +
labs(fill = "ATRX & DAXX Expression", x = "ATRX & DAXX Groups", y = "Count") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p2 <- ggplot(merged_atrx_daxx, aes(x = Combined_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "ATRX & DAXX Groups", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p2)
library(survival)
library(survminer)
surv_data <- merged_atrx_daxx %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit <- survfit(Surv(OS_time, OS_event) ~ Combined_group, data = surv_data)
km_plot <- ggsurvplot(
fit,
data = surv_data,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "ATRX/DAXX",
legend.labs = levels(factor(surv_data$Combined_group)),
palette = viridis(length(unique(surv_data$Combined_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
km_plot
TELS1 <- RNA_norm["ENSG00000121903.14", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(TELS1) <- "TELS1_expr"
TELS1$patient <- substr(rownames(TELS1), 1, 12)
TELS1$TELS1_group <- ifelse(TELS1$TELS1_expr <= median(TELS1$TELS1_expr),
"Low", "High")
TELS1 <- TELS1[!duplicated(TELS1$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(TELS1, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(TELS1, aes(x = "", y = TELS1_expr, fill = "TELS1")) +
geom_boxplot() +
labs(fill = "TELS1 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = TELS1_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TELS1 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_tels1 <- dplyr::left_join(TELS1, clin_sub, by = c("patient" = "submitter_id"))
surv_data <- merged_tels1 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit <- survfit(Surv(OS_time, OS_event) ~ TELS1_group, data = surv_data)
km_plot <- ggsurvplot(
fit,
data = surv_data,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TELS1",
legend.labs = levels(factor(surv_data$TELS1_group)),
palette = viridis(length(unique(surv_data$TELS1_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
km_plot
TRF2 <- RNA_norm["ENSG00000132604.11", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(TRF2) <- "TRF2_expr"
TRF2$patient <- substr(rownames(TRF2), 1, 12)
TRF2$TRF2_group <- ifelse(TRF2$TRF2_expr <= median(TRF2$TRF2_expr),
"Low", "High")
TRF2 <- TRF2[!duplicated(TRF2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(TRF2, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(TRF2, aes(x = "", y = TRF2_expr, fill = "TRF2")) +
geom_boxplot() +
labs(fill = "TRF2 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = TRF2_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TRF2 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_trf2 <- dplyr::left_join(TRF2, clin_sub,
by = c("patient" = "submitter_id"))
surv_data_trf2 <- merged_trf2 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit_trf2 <- survfit(Surv(OS_time, OS_event) ~ TRF2_group, data = surv_data_trf2)
ggsurvplot(
fit_trf2,
data = surv_data_trf2,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TRF2",
legend.labs = levels(factor(surv_data_trf2$TRF2_group)),
palette = viridis(length(unique(surv_data_trf2$TRF2_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
tels1_trf2 <- dplyr::inner_join(TELS1, TRF2, by = "patient")
tels1_trf2$Combined_group <- dplyr::case_when(
tels1_trf2$TELS1_group == "Low" & tels1_trf2$TRF2_group == "Low" ~ "Both Low",
tels1_trf2$TELS1_group == "High" & tels1_trf2$TRF2_group == "High" ~ "Both High",
TRUE ~ "One High"
)
tels1_trf2 <- tels1_trf2[!duplicated(tels1_trf2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged_tels1_trf2 <- dplyr::left_join(tels1_trf2, clin_sub, by = c("patient" = "submitter_id"))
p1 <- ggplot(tels1_trf2, aes(x = Combined_group, fill = Combined_group)) +
geom_bar() +
labs(fill = "TELS1 & TRF2 Expression", x = "TELS1 & TRF2 Groups", y = "Count") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p2 <- ggplot(merged_tels1_trf2, aes(x = Combined_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TELS1 & TRF2 Groups", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p2)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_tels1_trf2 <- dplyr::left_join(tels1_trf2, clin_sub, by = c("patient" = "submitter_id"))
surv_data_tels1_trf2 <- merged_tels1_trf2 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit_tels1_trf2 <- survfit(Surv(OS_time, OS_event) ~ Combined_group, data = surv_data_tels1_trf2)
ggsurvplot(
fit_tels1_trf2,
data = surv_data_tels1_trf2,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TELS1/TRF2",
legend.labs = levels(factor(surv_data_tels1_trf2$Combined_group)),
palette = viridis(length(unique(surv_data_tels1_trf2$Combined_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
cor.test(tels1_trf2$TELS1_expr, tels1_trf2$TRF2_expr)
##
## Pearson's product-moment correlation
##
## data: tels1_trf2$TELS1_expr and tels1_trf2$TRF2_expr
## t = 1.3926, df = 56, p-value = 0.1693
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.07908199 0.42133619
## sample estimates:
## cor
## 0.1829508
ggplot(tels1_trf2, aes(x = TELS1_expr, y = TRF2_expr)) +
geom_point(aes(color = Combined_group)) +
geom_smooth(method = "lm", se = FALSE) +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group)
##
## High Low
## High 15 14
## Low 14 15
chisq.test(table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group)
## X-squared = 0, df = 1, p-value = 1
cox_model <- coxph(Surv(OS_time, OS_event) ~ TELS1_group * TRF2_group, data = surv_data_tels1_trf2)
summary(cox_model)
## Call:
## coxph(formula = Surv(OS_time, OS_event) ~ TELS1_group * TRF2_group,
## data = surv_data_tels1_trf2)
##
## n= 58, number of events= 25
##
## coef exp(coef) se(coef) z Pr(>|z|)
## TELS1_groupLow -0.006495 0.993526 0.521076 -0.012 0.990
## TRF2_groupLow -0.044305 0.956662 0.544517 -0.081 0.935
## TELS1_groupLow:TRF2_groupLow -0.773183 0.461542 0.830690 -0.931 0.352
##
## exp(coef) exp(-coef) lower .95 upper .95
## TELS1_groupLow 0.9935 1.007 0.3578 2.759
## TRF2_groupLow 0.9567 1.045 0.3291 2.781
## TELS1_groupLow:TRF2_groupLow 0.4615 2.167 0.0906 2.351
##
## Concordance= 0.526 (se = 0.061 )
## Likelihood ratio test= 2.5 on 3 df, p=0.5
## Wald test = 2.12 on 3 df, p=0.5
## Score (logrank) test = 2.23 on 3 df, p=0.5
if (!requireNamespace("ComplexHeatmap", quietly = TRUE)) {
BiocManager::install("ComplexHeatmap")
}
suppressPackageStartupMessages(library(ComplexHeatmap))
suppressPackageStartupMessages(library(circlize))
expr_mat <- tels1_trf2[, c("TELS1_expr", "TRF2_expr")]
rownames(expr_mat) <- tels1_trf2$patient
expr_mat <- t(scale(expr_mat))
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
surv_annot <- dplyr::left_join(
data.frame(patient = colnames(expr_mat)),
clin_sub,
by = c("patient" = "submitter_id")
)
surv_annot$OS_time <- ifelse(is.na(surv_annot$days_to_death),
surv_annot$days_to_last_follow_up,
surv_annot$days_to_death)
surv_annot$OS_event <- ifelse(tolower(surv_annot$vital_status) %in% c("dead","deceased"), "Dead", "Alive")
ha <- HeatmapAnnotation(
OS_time = anno_barplot(surv_annot$OS_time, gp = gpar(fill = "grey")),
Status = surv_annot$OS_event,
col = list(Status = c("Alive" = "steelblue", "Dead" = "firebrick"))
)
Heatmap(expr_mat,
name = "Expression (z-score)",
top_annotation = ha,
show_row_names = TRUE,
show_column_names = FALSE,
column_title = "Patients (clustered by TELS1 + TRF2)")
tels1_clinical <- dplyr::left_join(
TELS1,
merged_clinical,
by = c("patient" = "submitter_id")
)
ggplot(merged_clinical, aes(x = age_at_diagnosis/365, fill = vital_status)) +
geom_histogram(binwidth = 5, position = "dodge") +
labs(title = "Age at Diagnosis by Vital Status",
x = "Age (years)", y = "Number of Patients") +
scale_fill_viridis_d(option = "cividis") +
theme_bw()
ggplot(merged_clinical, aes(x = gender.x, y = age_at_diagnosis/365, fill = gender.x)) +
geom_boxplot() +
labs(title = "Age at Diagnosis by Gender",
x = "Gender", y = "Age (years)", fill = "Gender") +
scale_fill_viridis_d(option = "cividis") +
theme_bw()
ggplot(tels1_clinical, aes(x = age_at_diagnosis/365, y = TELS1_expr, color = vital_status)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "TELS1 Expression vs Age at Diagnosis",
x = "Age (years)", y = "TELS1 log2 CPM") +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
trf2_clinical <- dplyr::left_join(
TRF2,
merged_clinical,
by = c("patient" = "submitter_id")
)
ggplot(trf2_clinical, aes(x = age_at_diagnosis/365, y = TRF2_expr, color = vital_status)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "TRF2 Expression vs Age at Diagnosis",
x = "Age (years)", y = "TRF2 log2 CPM") +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
if (!requireNamespace("ggalluvial", quietly = TRUE)) {
install.packages("ggalluvial")
}
suppressPackageStartupMessages(library(ggalluvial))
alluvial_df <- merged_tels1_trf2 %>%
dplyr::select(patient, TELS1_group, TRF2_group, vital_status) %>%
dplyr::filter(!is.na(TELS1_group), !is.na(TRF2_group), !is.na(vital_status))
ggplot(alluvial_df,
aes(axis1 = TELS1_group, axis2 = TRF2_group, axis3 = vital_status)) +
geom_alluvium(aes(fill = vital_status), width = 1/12) +
geom_stratum(width = 1/12, fill = "grey80", color = "black") +
geom_text(stat = "stratum", aes(label = paste0(after_stat(stratum), "\n(n=", after_stat(n), ")"))) +
scale_x_discrete(limits = c("TELS1", "TRF2", "Vital Status"), expand = c(.05, .05)) +
scale_fill_viridis_d(option = "cividis") +
theme_bw() +
labs(title = "Alluvial Plot of TELS1/TRF2 Groups and Vital Status")
data.stage2 <- merged_clinical[merged_clinical$primary_pathology_histological_type == "Pleomorphic 'MFH' / Undifferentiated pleomorphic sarcoma", ]
DT::datatable(
data.stage2,
filter = "top",
options = list(scrollX = TRUE, pageLength = 5, autoWidth = TRUE),
rownames = FALSE
)
ggplotly(ggplot(data.stage2)+aes(x=vital_status,fill=vital_status)+geom_bar()+labs(fill="Vital Status", x="Vital Status", y= "Count")+theme_bw()+scale_fill_viridis_d(option = "cividis")+theme(panel.grid.major = element_blank(), panel.grid.minor =element_blank()))
library(edgeR)
dge <- DGEList(counts = RNA)
dge <- calcNormFactors(dge)
RNA_norm <- cpm(dge, log = TRUE)
RNA_norm <- cpm(dge, log = TRUE)
lms_patients <- data.stage2$submitter_id
RNA_norm <- RNA_norm[, substr(colnames(RNA_norm), 1, 12) %in% lms_patients]
ACTA2 <- RNA_norm["ENSG00000107796.13", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(ACTA2) <- "ACTA2_expr"
ACTA2$patient <- substr(rownames(ACTA2), 1, 12)
ACTA2$ACTA2_group <- ifelse(ACTA2$ACTA2_expr <= median(ACTA2$ACTA2_expr),
"Low", "High")
ACTA2 <- ACTA2[!duplicated(ACTA2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(ACTA2, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(ACTA2, aes(x = "", y = ACTA2_expr, fill = "ACTA2")) +
geom_boxplot() +
labs(fill = "ACTA2 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = ACTA2_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "ACTA2 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
DAXX <- RNA_norm["ENSG00000204209.13", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(DAXX) <- "DAXX_expr"
DAXX$patient <- substr(rownames(DAXX), 1, 12)
DAXX$DAXX_group <- ifelse(DAXX$DAXX_expr <= median(DAXX$DAXX_expr),
"Low", "High")
DAXX <- DAXX[!duplicated(DAXX$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(DAXX, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(DAXX, aes(x = "", y = DAXX_expr, fill = "DAXX")) +
geom_boxplot() +
labs(fill = "DAXX RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = DAXX_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "DAXX Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
genes <- c("ENSG00000085224.23",
"ENSG00000204209.13")
atrx_daxx <- RNA_norm[genes, , drop = FALSE] |>
t() |>
as.data.frame()
colnames(atrx_daxx) <- c("ATRX_expr", "DAXX_expr")
atrx_daxx$patient <- substr(rownames(atrx_daxx), 1, 12)
atrx_daxx$ATRX_group <- ifelse(atrx_daxx$ATRX_expr <= median(atrx_daxx$ATRX_expr),
"Low", "High")
atrx_daxx$DAXX_group <- ifelse(atrx_daxx$DAXX_expr <= median(atrx_daxx$DAXX_expr),
"Low", "High")
atrx_daxx$Combined_group <- dplyr::case_when(
atrx_daxx$ATRX_group == "Low" & atrx_daxx$DAXX_group == "Low" ~ "Both Low",
atrx_daxx$ATRX_group == "High" & atrx_daxx$DAXX_group == "High" ~ "Both High",
TRUE ~ "One High"
)
atrx_daxx <- atrx_daxx[!duplicated(atrx_daxx$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_atrx_daxx <- dplyr::left_join(atrx_daxx, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(atrx_daxx, aes(x = Combined_group, fill = Combined_group)) +
geom_bar() +
labs(fill = "ATRX & DAXX Expression", x = "ATRX & DAXX Groups", y = "Count") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p2 <- ggplot(merged_atrx_daxx, aes(x = Combined_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "ATRX & DAXX Groups", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p2)
library(survival)
library(survminer)
surv_data <- merged_atrx_daxx %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit <- survfit(Surv(OS_time, OS_event) ~ Combined_group, data = surv_data)
km_plot <- ggsurvplot(
fit,
data = surv_data,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "ATRX/DAXX",
legend.labs = levels(factor(surv_data$Combined_group)),
palette = viridis(length(unique(surv_data$Combined_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
km_plot
TELS1 <- RNA_norm["ENSG00000121903.14", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(TELS1) <- "TELS1_expr"
TELS1$patient <- substr(rownames(TELS1), 1, 12)
TELS1$TELS1_group <- ifelse(TELS1$TELS1_expr <= median(TELS1$TELS1_expr),
"Low", "High")
TELS1 <- TELS1[!duplicated(TELS1$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(TELS1, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(TELS1, aes(x = "", y = TELS1_expr, fill = "TELS1")) +
geom_boxplot() +
labs(fill = "TELS1 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = TELS1_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TELS1 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_tels1 <- dplyr::left_join(TELS1, clin_sub, by = c("patient" = "submitter_id"))
surv_data <- merged_tels1 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit <- survfit(Surv(OS_time, OS_event) ~ TELS1_group, data = surv_data)
km_plot <- ggsurvplot(
fit,
data = surv_data,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TELS1",
legend.labs = levels(factor(surv_data$TELS1_group)),
palette = viridis(length(unique(surv_data$TELS1_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
km_plot
TRF2 <- RNA_norm["ENSG00000132604.11", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(TRF2) <- "TRF2_expr"
TRF2$patient <- substr(rownames(TRF2), 1, 12)
TRF2$TRF2_group <- ifelse(TRF2$TRF2_expr <= median(TRF2$TRF2_expr),
"Low", "High")
TRF2 <- TRF2[!duplicated(TRF2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(TRF2, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(TRF2, aes(x = "", y = TRF2_expr, fill = "TRF2")) +
geom_boxplot() +
labs(fill = "TRF2 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = TRF2_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TRF2 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_trf2 <- dplyr::left_join(TRF2, clin_sub,
by = c("patient" = "submitter_id"))
surv_data_trf2 <- merged_trf2 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit_trf2 <- survfit(Surv(OS_time, OS_event) ~ TRF2_group, data = surv_data_trf2)
ggsurvplot(
fit_trf2,
data = surv_data_trf2,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TRF2",
legend.labs = levels(factor(surv_data_trf2$TRF2_group)),
palette = viridis(length(unique(surv_data_trf2$TRF2_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
tels1_trf2 <- dplyr::inner_join(TELS1, TRF2, by = "patient")
tels1_trf2$Combined_group <- dplyr::case_when(
tels1_trf2$TELS1_group == "Low" & tels1_trf2$TRF2_group == "Low" ~ "Both Low",
tels1_trf2$TELS1_group == "High" & tels1_trf2$TRF2_group == "High" ~ "Both High",
TRUE ~ "One High"
)
tels1_trf2 <- tels1_trf2[!duplicated(tels1_trf2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged_tels1_trf2 <- dplyr::left_join(tels1_trf2, clin_sub, by = c("patient" = "submitter_id"))
p1 <- ggplot(tels1_trf2, aes(x = Combined_group, fill = Combined_group)) +
geom_bar() +
labs(fill = "TELS1 & TRF2 Expression", x = "TELS1 & TRF2 Groups", y = "Count") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p2 <- ggplot(merged_tels1_trf2, aes(x = Combined_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TELS1 & TRF2 Groups", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p2)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_tels1_trf2 <- dplyr::left_join(tels1_trf2, clin_sub, by = c("patient" = "submitter_id"))
surv_data_tels1_trf2 <- merged_tels1_trf2 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit_tels1_trf2 <- survfit(Surv(OS_time, OS_event) ~ Combined_group, data = surv_data_tels1_trf2)
ggsurvplot(
fit_tels1_trf2,
data = surv_data_tels1_trf2,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TELS1/TRF2",
legend.labs = levels(factor(surv_data_tels1_trf2$Combined_group)),
palette = viridis(length(unique(surv_data_tels1_trf2$Combined_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
cor.test(tels1_trf2$TELS1_expr, tels1_trf2$TRF2_expr)
##
## Pearson's product-moment correlation
##
## data: tels1_trf2$TELS1_expr and tels1_trf2$TRF2_expr
## t = 0.52041, df = 27, p-value = 0.607
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2769681 0.4497333
## sample estimates:
## cor
## 0.09965347
ggplot(tels1_trf2, aes(x = TELS1_expr, y = TRF2_expr)) +
geom_point(aes(color = Combined_group)) +
geom_smooth(method = "lm", se = FALSE) +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group)
##
## High Low
## High 6 8
## Low 8 7
chisq.test(table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group)
## X-squared = 0.03699, df = 1, p-value = 0.8475
cox_model <- coxph(Surv(OS_time, OS_event) ~ TELS1_group * TRF2_group, data = surv_data_tels1_trf2)
summary(cox_model)
## Call:
## coxph(formula = Surv(OS_time, OS_event) ~ TELS1_group * TRF2_group,
## data = surv_data_tels1_trf2)
##
## n= 29, number of events= 11
##
## coef exp(coef) se(coef) z Pr(>|z|)
## TELS1_groupLow 0.1491 1.1608 0.9142 0.163 0.870
## TRF2_groupLow 0.2080 1.2312 0.9148 0.227 0.820
## TELS1_groupLow:TRF2_groupLow 0.3503 1.4195 1.2276 0.285 0.775
##
## exp(coef) exp(-coef) lower .95 upper .95
## TELS1_groupLow 1.161 0.8615 0.1935 6.965
## TRF2_groupLow 1.231 0.8122 0.2049 7.397
## TELS1_groupLow:TRF2_groupLow 1.419 0.7045 0.1280 15.741
##
## Concordance= 0.599 (se = 0.085 )
## Likelihood ratio test= 0.71 on 3 df, p=0.9
## Wald test = 0.76 on 3 df, p=0.9
## Score (logrank) test = 0.79 on 3 df, p=0.9
if (!requireNamespace("ComplexHeatmap", quietly = TRUE)) {
BiocManager::install("ComplexHeatmap")
}
suppressPackageStartupMessages(library(ComplexHeatmap))
suppressPackageStartupMessages(library(circlize))
expr_mat <- tels1_trf2[, c("TELS1_expr", "TRF2_expr")]
rownames(expr_mat) <- tels1_trf2$patient
expr_mat <- t(scale(expr_mat))
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
surv_annot <- dplyr::left_join(
data.frame(patient = colnames(expr_mat)),
clin_sub,
by = c("patient" = "submitter_id")
)
surv_annot$OS_time <- ifelse(is.na(surv_annot$days_to_death),
surv_annot$days_to_last_follow_up,
surv_annot$days_to_death)
surv_annot$OS_event <- ifelse(tolower(surv_annot$vital_status) %in% c("dead","deceased"), "Dead", "Alive")
ha <- HeatmapAnnotation(
OS_time = anno_barplot(surv_annot$OS_time, gp = gpar(fill = "grey")),
Status = surv_annot$OS_event,
col = list(Status = c("Alive" = "steelblue", "Dead" = "firebrick"))
)
Heatmap(expr_mat,
name = "Expression (z-score)",
top_annotation = ha,
show_row_names = TRUE,
show_column_names = FALSE,
column_title = "Patients (clustered by TELS1 + TRF2)")
tels1_clinical <- dplyr::left_join(
TELS1,
merged_clinical,
by = c("patient" = "submitter_id")
)
ggplot(merged_clinical, aes(x = age_at_diagnosis/365, fill = vital_status)) +
geom_histogram(binwidth = 5, position = "dodge") +
labs(title = "Age at Diagnosis by Vital Status",
x = "Age (years)", y = "Number of Patients") +
scale_fill_viridis_d(option = "cividis") +
theme_bw()
ggplot(merged_clinical, aes(x = gender.x, y = age_at_diagnosis/365, fill = gender.x)) +
geom_boxplot() +
labs(title = "Age at Diagnosis by Gender",
x = "Gender", y = "Age (years)", fill = "Gender") +
scale_fill_viridis_d(option = "cividis") +
theme_bw()
ggplot(tels1_clinical, aes(x = age_at_diagnosis/365, y = TELS1_expr, color = vital_status)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "TELS1 Expression vs Age at Diagnosis",
x = "Age (years)", y = "TELS1 log2 CPM") +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
trf2_clinical <- dplyr::left_join(
TRF2,
merged_clinical,
by = c("patient" = "submitter_id")
)
ggplot(trf2_clinical, aes(x = age_at_diagnosis/365, y = TRF2_expr, color = vital_status)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "TRF2 Expression vs Age at Diagnosis",
x = "Age (years)", y = "TRF2 log2 CPM") +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
if (!requireNamespace("ggalluvial", quietly = TRUE)) {
install.packages("ggalluvial")
}
suppressPackageStartupMessages(library(ggalluvial))
alluvial_df <- merged_tels1_trf2 %>%
dplyr::select(patient, TELS1_group, TRF2_group, vital_status) %>%
dplyr::filter(!is.na(TELS1_group), !is.na(TRF2_group), !is.na(vital_status))
ggplot(alluvial_df,
aes(axis1 = TELS1_group, axis2 = TRF2_group, axis3 = vital_status)) +
geom_alluvium(aes(fill = vital_status), width = 1/12) +
geom_stratum(width = 1/12, fill = "grey80", color = "black") +
geom_text(stat = "stratum", aes(label = paste0(after_stat(stratum), "\n(n=", after_stat(n), ")"))) +
scale_x_discrete(limits = c("TELS1", "TRF2", "Vital Status"), expand = c(.05, .05)) +
scale_fill_viridis_d(option = "cividis") +
theme_bw() +
labs(title = "Alluvial Plot of TELS1/TRF2 Groups and Vital Status")
data.stage2 <- merged_clinical[merged_clinical$primary_pathology_histological_type == "Undifferentiated Pleomorphic Sarcoma (UPS)", ]
DT::datatable(
data.stage2,
filter = "top",
options = list(scrollX = TRUE, pageLength = 5, autoWidth = TRUE),
rownames = FALSE
)
ggplotly(ggplot(data.stage2)+aes(x=vital_status,fill=vital_status)+geom_bar()+labs(fill="Vital Status", x="Vital Status", y= "Count")+theme_bw()+scale_fill_viridis_d(option = "cividis")+theme(panel.grid.major = element_blank(), panel.grid.minor =element_blank()))
library(edgeR)
dge <- DGEList(counts = RNA)
dge <- calcNormFactors(dge)
RNA_norm <- cpm(dge, log = TRUE)
RNA_norm <- cpm(dge, log = TRUE)
lms_patients <- data.stage2$submitter_id
RNA_norm <- RNA_norm[, substr(colnames(RNA_norm), 1, 12) %in% lms_patients]
ACTA2 <- RNA_norm["ENSG00000107796.13", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(ACTA2) <- "ACTA2_expr"
ACTA2$patient <- substr(rownames(ACTA2), 1, 12)
ACTA2$ACTA2_group <- ifelse(ACTA2$ACTA2_expr <= median(ACTA2$ACTA2_expr),
"Low", "High")
ACTA2 <- ACTA2[!duplicated(ACTA2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(ACTA2, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(ACTA2, aes(x = "", y = ACTA2_expr, fill = "ACTA2")) +
geom_boxplot() +
labs(fill = "ACTA2 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = ACTA2_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "ACTA2 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
DAXX <- RNA_norm["ENSG00000204209.13", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(DAXX) <- "DAXX_expr"
DAXX$patient <- substr(rownames(DAXX), 1, 12)
DAXX$DAXX_group <- ifelse(DAXX$DAXX_expr <= median(DAXX$DAXX_expr),
"Low", "High")
DAXX <- DAXX[!duplicated(DAXX$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(DAXX, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(DAXX, aes(x = "", y = DAXX_expr, fill = "DAXX")) +
geom_boxplot() +
labs(fill = "DAXX RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = DAXX_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "DAXX Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
genes <- c("ENSG00000085224.23",
"ENSG00000204209.13")
atrx_daxx <- RNA_norm[genes, , drop = FALSE] |>
t() |>
as.data.frame()
colnames(atrx_daxx) <- c("ATRX_expr", "DAXX_expr")
atrx_daxx$patient <- substr(rownames(atrx_daxx), 1, 12)
atrx_daxx$ATRX_group <- ifelse(atrx_daxx$ATRX_expr <= median(atrx_daxx$ATRX_expr),
"Low", "High")
atrx_daxx$DAXX_group <- ifelse(atrx_daxx$DAXX_expr <= median(atrx_daxx$DAXX_expr),
"Low", "High")
atrx_daxx$Combined_group <- dplyr::case_when(
atrx_daxx$ATRX_group == "Low" & atrx_daxx$DAXX_group == "Low" ~ "Both Low",
atrx_daxx$ATRX_group == "High" & atrx_daxx$DAXX_group == "High" ~ "Both High",
TRUE ~ "One High"
)
atrx_daxx <- atrx_daxx[!duplicated(atrx_daxx$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_atrx_daxx <- dplyr::left_join(atrx_daxx, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(atrx_daxx, aes(x = Combined_group, fill = Combined_group)) +
geom_bar() +
labs(fill = "ATRX & DAXX Expression", x = "ATRX & DAXX Groups", y = "Count") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p2 <- ggplot(merged_atrx_daxx, aes(x = Combined_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "ATRX & DAXX Groups", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p2)
library(survival)
library(survminer)
surv_data <- merged_atrx_daxx %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit <- survfit(Surv(OS_time, OS_event) ~ Combined_group, data = surv_data)
km_plot <- ggsurvplot(
fit,
data = surv_data,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "ATRX/DAXX",
legend.labs = levels(factor(surv_data$Combined_group)),
palette = viridis(length(unique(surv_data$Combined_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
km_plot
TELS1 <- RNA_norm["ENSG00000121903.14", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(TELS1) <- "TELS1_expr"
TELS1$patient <- substr(rownames(TELS1), 1, 12)
TELS1$TELS1_group <- ifelse(TELS1$TELS1_expr <= median(TELS1$TELS1_expr),
"Low", "High")
TELS1 <- TELS1[!duplicated(TELS1$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(TELS1, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(TELS1, aes(x = "", y = TELS1_expr, fill = "TELS1")) +
geom_boxplot() +
labs(fill = "TELS1 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = TELS1_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TELS1 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_tels1 <- dplyr::left_join(TELS1, clin_sub, by = c("patient" = "submitter_id"))
surv_data <- merged_tels1 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit <- survfit(Surv(OS_time, OS_event) ~ TELS1_group, data = surv_data)
km_plot <- ggsurvplot(
fit,
data = surv_data,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TELS1",
legend.labs = levels(factor(surv_data$TELS1_group)),
palette = viridis(length(unique(surv_data$TELS1_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
km_plot
TRF2 <- RNA_norm["ENSG00000132604.11", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(TRF2) <- "TRF2_expr"
TRF2$patient <- substr(rownames(TRF2), 1, 12)
TRF2$TRF2_group <- ifelse(TRF2$TRF2_expr <= median(TRF2$TRF2_expr),
"Low", "High")
TRF2 <- TRF2[!duplicated(TRF2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(TRF2, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(TRF2, aes(x = "", y = TRF2_expr, fill = "TRF2")) +
geom_boxplot() +
labs(fill = "TRF2 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = TRF2_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TRF2 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_trf2 <- dplyr::left_join(TRF2, clin_sub,
by = c("patient" = "submitter_id"))
surv_data_trf2 <- merged_trf2 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit_trf2 <- survfit(Surv(OS_time, OS_event) ~ TRF2_group, data = surv_data_trf2)
ggsurvplot(
fit_trf2,
data = surv_data_trf2,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TRF2",
legend.labs = levels(factor(surv_data_trf2$TRF2_group)),
palette = viridis(length(unique(surv_data_trf2$TRF2_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
tels1_trf2 <- dplyr::inner_join(TELS1, TRF2, by = "patient")
tels1_trf2$Combined_group <- dplyr::case_when(
tels1_trf2$TELS1_group == "Low" & tels1_trf2$TRF2_group == "Low" ~ "Both Low",
tels1_trf2$TELS1_group == "High" & tels1_trf2$TRF2_group == "High" ~ "Both High",
TRUE ~ "One High"
)
tels1_trf2 <- tels1_trf2[!duplicated(tels1_trf2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged_tels1_trf2 <- dplyr::left_join(tels1_trf2, clin_sub, by = c("patient" = "submitter_id"))
p1 <- ggplot(tels1_trf2, aes(x = Combined_group, fill = Combined_group)) +
geom_bar() +
labs(fill = "TELS1 & TRF2 Expression", x = "TELS1 & TRF2 Groups", y = "Count") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p2 <- ggplot(merged_tels1_trf2, aes(x = Combined_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TELS1 & TRF2 Groups", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p2)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_tels1_trf2 <- dplyr::left_join(tels1_trf2, clin_sub, by = c("patient" = "submitter_id"))
surv_data_tels1_trf2 <- merged_tels1_trf2 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit_tels1_trf2 <- survfit(Surv(OS_time, OS_event) ~ Combined_group, data = surv_data_tels1_trf2)
ggsurvplot(
fit_tels1_trf2,
data = surv_data_tels1_trf2,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TELS1/TRF2",
legend.labs = levels(factor(surv_data_tels1_trf2$Combined_group)),
palette = viridis(length(unique(surv_data_tels1_trf2$Combined_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
cor.test(tels1_trf2$TELS1_expr, tels1_trf2$TRF2_expr)
##
## Pearson's product-moment correlation
##
## data: tels1_trf2$TELS1_expr and tels1_trf2$TRF2_expr
## t = 0.6089, df = 19, p-value = 0.5498
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3119714 0.5379086
## sample estimates:
## cor
## 0.1383471
ggplot(tels1_trf2, aes(x = TELS1_expr, y = TRF2_expr)) +
geom_point(aes(color = Combined_group)) +
geom_smooth(method = "lm", se = FALSE) +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group)
##
## High Low
## High 6 4
## Low 4 7
chisq.test(table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group)
## X-squared = 0.41696, df = 1, p-value = 0.5185
cox_model <- coxph(Surv(OS_time, OS_event) ~ TELS1_group * TRF2_group, data = surv_data_tels1_trf2)
summary(cox_model)
## Call:
## coxph(formula = Surv(OS_time, OS_event) ~ TELS1_group * TRF2_group,
## data = surv_data_tels1_trf2)
##
## n= 21, number of events= 5
##
## coef exp(coef) se(coef) z Pr(>|z|)
## TELS1_groupLow -4.101e+01 1.552e-18 2.123e+04 -0.002 0.998
## TRF2_groupLow 4.492e-01 1.567e+00 1.010e+00 0.445 0.657
## TELS1_groupLow:TRF2_groupLow 1.951e+01 2.983e+08 1.515e+04 0.001 0.999
##
## exp(coef) exp(-coef) lower .95 upper .95
## TELS1_groupLow 1.552e-18 6.443e+17 0.0000 Inf
## TRF2_groupLow 1.567e+00 6.381e-01 0.2165 11.34
## TELS1_groupLow:TRF2_groupLow 2.983e+08 3.353e-09 0.0000 Inf
##
## Concordance= 0.894 (se = 0.065 )
## Likelihood ratio test= 8.5 on 3 df, p=0.04
## Wald test = 0.2 on 3 df, p=1
## Score (logrank) test = 6.35 on 3 df, p=0.1
if (!requireNamespace("ComplexHeatmap", quietly = TRUE)) {
BiocManager::install("ComplexHeatmap")
}
suppressPackageStartupMessages(library(ComplexHeatmap))
suppressPackageStartupMessages(library(circlize))
expr_mat <- tels1_trf2[, c("TELS1_expr", "TRF2_expr")]
rownames(expr_mat) <- tels1_trf2$patient
expr_mat <- t(scale(expr_mat))
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
surv_annot <- dplyr::left_join(
data.frame(patient = colnames(expr_mat)),
clin_sub,
by = c("patient" = "submitter_id")
)
surv_annot$OS_time <- ifelse(is.na(surv_annot$days_to_death),
surv_annot$days_to_last_follow_up,
surv_annot$days_to_death)
surv_annot$OS_event <- ifelse(tolower(surv_annot$vital_status) %in% c("dead","deceased"), "Dead", "Alive")
ha <- HeatmapAnnotation(
OS_time = anno_barplot(surv_annot$OS_time, gp = gpar(fill = "grey")),
Status = surv_annot$OS_event,
col = list(Status = c("Alive" = "steelblue", "Dead" = "firebrick"))
)
Heatmap(expr_mat,
name = "Expression (z-score)",
top_annotation = ha,
show_row_names = TRUE,
show_column_names = FALSE,
column_title = "Patients (clustered by TELS1 + TRF2)")
tels1_clinical <- dplyr::left_join(
TELS1,
merged_clinical,
by = c("patient" = "submitter_id")
)
ggplot(merged_clinical, aes(x = age_at_diagnosis/365, fill = vital_status)) +
geom_histogram(binwidth = 5, position = "dodge") +
labs(title = "Age at Diagnosis by Vital Status",
x = "Age (years)", y = "Number of Patients") +
scale_fill_viridis_d(option = "cividis") +
theme_bw()
ggplot(merged_clinical, aes(x = gender.x, y = age_at_diagnosis/365, fill = gender.x)) +
geom_boxplot() +
labs(title = "Age at Diagnosis by Gender",
x = "Gender", y = "Age (years)", fill = "Gender") +
scale_fill_viridis_d(option = "cividis") +
theme_bw()
ggplot(tels1_clinical, aes(x = age_at_diagnosis/365, y = TELS1_expr, color = vital_status)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "TELS1 Expression vs Age at Diagnosis",
x = "Age (years)", y = "TELS1 log2 CPM") +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
trf2_clinical <- dplyr::left_join(
TRF2,
merged_clinical,
by = c("patient" = "submitter_id")
)
ggplot(trf2_clinical, aes(x = age_at_diagnosis/365, y = TRF2_expr, color = vital_status)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "TRF2 Expression vs Age at Diagnosis",
x = "Age (years)", y = "TRF2 log2 CPM") +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
if (!requireNamespace("ggalluvial", quietly = TRUE)) {
install.packages("ggalluvial")
}
suppressPackageStartupMessages(library(ggalluvial))
alluvial_df <- merged_tels1_trf2 %>%
dplyr::select(patient, TELS1_group, TRF2_group, vital_status) %>%
dplyr::filter(!is.na(TELS1_group), !is.na(TRF2_group), !is.na(vital_status))
ggplot(alluvial_df,
aes(axis1 = TELS1_group, axis2 = TRF2_group, axis3 = vital_status)) +
geom_alluvium(aes(fill = vital_status), width = 1/12) +
geom_stratum(width = 1/12, fill = "grey80", color = "black") +
geom_text(stat = "stratum", aes(label = paste0(after_stat(stratum), "\n(n=", after_stat(n), ")"))) +
scale_x_discrete(limits = c("TELS1", "TRF2", "Vital Status"), expand = c(.05, .05)) +
scale_fill_viridis_d(option = "cividis") +
theme_bw() +
labs(title = "Alluvial Plot of TELS1/TRF2 Groups and Vital Status")
data.stage2 <- merged_clinical[merged_clinical$primary_pathology_histological_type == "Myxofibrosarcoma", ]
DT::datatable(
data.stage2,
filter = "top",
options = list(scrollX = TRUE, pageLength = 5, autoWidth = TRUE),
rownames = FALSE
)
ggplotly(ggplot(data.stage2)+aes(x=vital_status,fill=vital_status)+geom_bar()+labs(fill="Vital Status", x="Vital Status", y= "Count")+theme_bw()+scale_fill_viridis_d(option = "cividis")+theme(panel.grid.major = element_blank(), panel.grid.minor =element_blank()))
library(edgeR)
dge <- DGEList(counts = RNA)
dge <- calcNormFactors(dge)
RNA_norm <- cpm(dge, log = TRUE)
RNA_norm <- cpm(dge, log = TRUE)
lms_patients <- data.stage2$submitter_id
RNA_norm <- RNA_norm[, substr(colnames(RNA_norm), 1, 12) %in% lms_patients]
ACTA2 <- RNA_norm["ENSG00000107796.13", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(ACTA2) <- "ACTA2_expr"
ACTA2$patient <- substr(rownames(ACTA2), 1, 12)
ACTA2$ACTA2_group <- ifelse(ACTA2$ACTA2_expr <= median(ACTA2$ACTA2_expr),
"Low", "High")
ACTA2 <- ACTA2[!duplicated(ACTA2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(ACTA2, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(ACTA2, aes(x = "", y = ACTA2_expr, fill = "ACTA2")) +
geom_boxplot() +
labs(fill = "ACTA2 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = ACTA2_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "ACTA2 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
DAXX <- RNA_norm["ENSG00000204209.13", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(DAXX) <- "DAXX_expr"
DAXX$patient <- substr(rownames(DAXX), 1, 12)
DAXX$DAXX_group <- ifelse(DAXX$DAXX_expr <= median(DAXX$DAXX_expr),
"Low", "High")
DAXX <- DAXX[!duplicated(DAXX$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(DAXX, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(DAXX, aes(x = "", y = DAXX_expr, fill = "DAXX")) +
geom_boxplot() +
labs(fill = "DAXX RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = DAXX_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "DAXX Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
genes <- c("ENSG00000085224.23",
"ENSG00000204209.13")
atrx_daxx <- RNA_norm[genes, , drop = FALSE] |>
t() |>
as.data.frame()
colnames(atrx_daxx) <- c("ATRX_expr", "DAXX_expr")
atrx_daxx$patient <- substr(rownames(atrx_daxx), 1, 12)
atrx_daxx$ATRX_group <- ifelse(atrx_daxx$ATRX_expr <= median(atrx_daxx$ATRX_expr),
"Low", "High")
atrx_daxx$DAXX_group <- ifelse(atrx_daxx$DAXX_expr <= median(atrx_daxx$DAXX_expr),
"Low", "High")
atrx_daxx$Combined_group <- dplyr::case_when(
atrx_daxx$ATRX_group == "Low" & atrx_daxx$DAXX_group == "Low" ~ "Both Low",
atrx_daxx$ATRX_group == "High" & atrx_daxx$DAXX_group == "High" ~ "Both High",
TRUE ~ "One High"
)
atrx_daxx <- atrx_daxx[!duplicated(atrx_daxx$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_atrx_daxx <- dplyr::left_join(atrx_daxx, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(atrx_daxx, aes(x = Combined_group, fill = Combined_group)) +
geom_bar() +
labs(fill = "ATRX & DAXX Expression", x = "ATRX & DAXX Groups", y = "Count") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p2 <- ggplot(merged_atrx_daxx, aes(x = Combined_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "ATRX & DAXX Groups", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p2)
library(survival)
library(survminer)
surv_data <- merged_atrx_daxx %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit <- survfit(Surv(OS_time, OS_event) ~ Combined_group, data = surv_data)
km_plot <- ggsurvplot(
fit,
data = surv_data,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "ATRX/DAXX",
legend.labs = levels(factor(surv_data$Combined_group)),
palette = viridis(length(unique(surv_data$Combined_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
km_plot
TELS1 <- RNA_norm["ENSG00000121903.14", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(TELS1) <- "TELS1_expr"
TELS1$patient <- substr(rownames(TELS1), 1, 12)
TELS1$TELS1_group <- ifelse(TELS1$TELS1_expr <= median(TELS1$TELS1_expr),
"Low", "High")
TELS1 <- TELS1[!duplicated(TELS1$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(TELS1, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(TELS1, aes(x = "", y = TELS1_expr, fill = "TELS1")) +
geom_boxplot() +
labs(fill = "TELS1 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = TELS1_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TELS1 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_tels1 <- dplyr::left_join(TELS1, clin_sub, by = c("patient" = "submitter_id"))
surv_data <- merged_tels1 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit <- survfit(Surv(OS_time, OS_event) ~ TELS1_group, data = surv_data)
km_plot <- ggsurvplot(
fit,
data = surv_data,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TELS1",
legend.labs = levels(factor(surv_data$TELS1_group)),
palette = viridis(length(unique(surv_data$TELS1_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
km_plot
TRF2 <- RNA_norm["ENSG00000132604.11", , drop = FALSE] |>
t() |>
as.data.frame()
colnames(TRF2) <- "TRF2_expr"
TRF2$patient <- substr(rownames(TRF2), 1, 12)
TRF2$TRF2_group <- ifelse(TRF2$TRF2_expr <= median(TRF2$TRF2_expr),
"Low", "High")
TRF2 <- TRF2[!duplicated(TRF2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged <- dplyr::left_join(TRF2, clin_sub,
by = c("patient" = "submitter_id"))
p1 <- ggplot(TRF2, aes(x = "", y = TRF2_expr, fill = "TRF2")) +
geom_boxplot() +
labs(fill = "TRF2 RNA (log2 CPM)", x = "", y = "log2 CPM") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p3 <- ggplot(merged, aes(x = TRF2_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TRF2 Expression", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p3)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_trf2 <- dplyr::left_join(TRF2, clin_sub,
by = c("patient" = "submitter_id"))
surv_data_trf2 <- merged_trf2 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit_trf2 <- survfit(Surv(OS_time, OS_event) ~ TRF2_group, data = surv_data_trf2)
ggsurvplot(
fit_trf2,
data = surv_data_trf2,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TRF2",
legend.labs = levels(factor(surv_data_trf2$TRF2_group)),
palette = viridis(length(unique(surv_data_trf2$TRF2_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
tels1_trf2 <- dplyr::inner_join(TELS1, TRF2, by = "patient")
tels1_trf2$Combined_group <- dplyr::case_when(
tels1_trf2$TELS1_group == "Low" & tels1_trf2$TRF2_group == "Low" ~ "Both Low",
tels1_trf2$TELS1_group == "High" & tels1_trf2$TRF2_group == "High" ~ "Both High",
TRUE ~ "One High"
)
tels1_trf2 <- tels1_trf2[!duplicated(tels1_trf2$patient), ]
clin_sub <- data.stage2[, c("submitter_id", "vital_status")]
merged_tels1_trf2 <- dplyr::left_join(tels1_trf2, clin_sub, by = c("patient" = "submitter_id"))
p1 <- ggplot(tels1_trf2, aes(x = Combined_group, fill = Combined_group)) +
geom_bar() +
labs(fill = "TELS1 & TRF2 Expression", x = "TELS1 & TRF2 Groups", y = "Count") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p2 <- ggplot(merged_tels1_trf2, aes(x = Combined_group, fill = vital_status)) +
geom_bar(position = "fill") +
labs(fill = "Vital Status", x = "TELS1 & TRF2 Groups", y = "Frequency") +
theme_bw() +
scale_fill_viridis_d(option = "cividis") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p1)
ggplotly(p2)
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
merged_tels1_trf2 <- dplyr::left_join(tels1_trf2, clin_sub, by = c("patient" = "submitter_id"))
surv_data_tels1_trf2 <- merged_tels1_trf2 %>%
dplyr::mutate(
OS_time = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death),
OS_event = ifelse(tolower(vital_status) %in% c("dead","deceased"), 1, 0)
) %>%
dplyr::filter(!is.na(OS_time) & !is.na(OS_event))
fit_tels1_trf2 <- survfit(Surv(OS_time, OS_event) ~ Combined_group, data = surv_data_tels1_trf2)
ggsurvplot(
fit_tels1_trf2,
data = surv_data_tels1_trf2,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.title = "TELS1/TRF2",
legend.labs = levels(factor(surv_data_tels1_trf2$Combined_group)),
palette = viridis(length(unique(surv_data_tels1_trf2$Combined_group)), option = "cividis"),
xlab = "Days",
ylab = "Overall survival probability",
ggtheme = theme_bw()
)
cor.test(tels1_trf2$TELS1_expr, tels1_trf2$TRF2_expr)
##
## Pearson's product-moment correlation
##
## data: tels1_trf2$TELS1_expr and tels1_trf2$TRF2_expr
## t = 1.1056, df = 23, p-value = 0.2803
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1870919 0.5692479
## sample estimates:
## cor
## 0.2246461
ggplot(tels1_trf2, aes(x = TELS1_expr, y = TRF2_expr)) +
geom_point(aes(color = Combined_group)) +
geom_smooth(method = "lm", se = FALSE) +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group)
##
## High Low
## High 6 6
## Low 6 7
chisq.test(table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(tels1_trf2$TELS1_group, tels1_trf2$TRF2_group)
## X-squared = 0, df = 1, p-value = 1
cox_model <- coxph(Surv(OS_time, OS_event) ~ TELS1_group * TRF2_group, data = surv_data_tels1_trf2)
summary(cox_model)
## Call:
## coxph(formula = Surv(OS_time, OS_event) ~ TELS1_group * TRF2_group,
## data = surv_data_tels1_trf2)
##
## n= 25, number of events= 9
##
## coef exp(coef) se(coef) z Pr(>|z|)
## TELS1_groupLow -2.091e+01 8.278e-10 1.160e+04 -0.002 0.999
## TRF2_groupLow -6.846e-01 5.043e-01 7.889e-01 -0.868 0.386
## TELS1_groupLow:TRF2_groupLow 1.971e+01 3.643e+08 1.160e+04 0.002 0.999
##
## exp(coef) exp(-coef) lower .95 upper .95
## TELS1_groupLow 8.278e-10 1.208e+09 0.0000 Inf
## TRF2_groupLow 5.043e-01 1.983e+00 0.1074 2.367
## TELS1_groupLow:TRF2_groupLow 3.643e+08 2.745e-09 0.0000 Inf
##
## Concordance= 0.78 (se = 0.068 )
## Likelihood ratio test= 10.19 on 3 df, p=0.02
## Wald test = 4.09 on 3 df, p=0.3
## Score (logrank) test = 9.49 on 3 df, p=0.02
if (!requireNamespace("ComplexHeatmap", quietly = TRUE)) {
BiocManager::install("ComplexHeatmap")
}
suppressPackageStartupMessages(library(ComplexHeatmap))
suppressPackageStartupMessages(library(circlize))
expr_mat <- tels1_trf2[, c("TELS1_expr", "TRF2_expr")]
rownames(expr_mat) <- tels1_trf2$patient
expr_mat <- t(scale(expr_mat))
clin_sub <- data.stage2[, c("submitter_id", "vital_status", "days_to_death", "days_to_last_follow_up")]
surv_annot <- dplyr::left_join(
data.frame(patient = colnames(expr_mat)),
clin_sub,
by = c("patient" = "submitter_id")
)
surv_annot$OS_time <- ifelse(is.na(surv_annot$days_to_death),
surv_annot$days_to_last_follow_up,
surv_annot$days_to_death)
surv_annot$OS_event <- ifelse(tolower(surv_annot$vital_status) %in% c("dead","deceased"), "Dead", "Alive")
ha <- HeatmapAnnotation(
OS_time = anno_barplot(surv_annot$OS_time, gp = gpar(fill = "grey")),
Status = surv_annot$OS_event,
col = list(Status = c("Alive" = "steelblue", "Dead" = "firebrick"))
)
Heatmap(expr_mat,
name = "Expression (z-score)",
top_annotation = ha,
show_row_names = TRUE,
show_column_names = FALSE,
column_title = "Patients (clustered by TELS1 + TRF2)")
tels1_clinical <- dplyr::left_join(
TELS1,
merged_clinical,
by = c("patient" = "submitter_id")
)
ggplot(merged_clinical, aes(x = age_at_diagnosis/365, fill = vital_status)) +
geom_histogram(binwidth = 5, position = "dodge") +
labs(title = "Age at Diagnosis by Vital Status",
x = "Age (years)", y = "Number of Patients") +
scale_fill_viridis_d(option = "cividis") +
theme_bw()
ggplot(merged_clinical, aes(x = gender.x, y = age_at_diagnosis/365, fill = gender.x)) +
geom_boxplot() +
labs(title = "Age at Diagnosis by Gender",
x = "Gender", y = "Age (years)", fill = "Gender") +
scale_fill_viridis_d(option = "cividis") +
theme_bw()
ggplot(tels1_clinical, aes(x = age_at_diagnosis/365, y = TELS1_expr, color = vital_status)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "TELS1 Expression vs Age at Diagnosis",
x = "Age (years)", y = "TELS1 log2 CPM") +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
trf2_clinical <- dplyr::left_join(
TRF2,
merged_clinical,
by = c("patient" = "submitter_id")
)
ggplot(trf2_clinical, aes(x = age_at_diagnosis/365, y = TRF2_expr, color = vital_status)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "TRF2 Expression vs Age at Diagnosis",
x = "Age (years)", y = "TRF2 log2 CPM") +
scale_color_viridis_d(option = "cividis")+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
if (!requireNamespace("ggalluvial", quietly = TRUE)) {
install.packages("ggalluvial")
}
suppressPackageStartupMessages(library(ggalluvial))
alluvial_df <- merged_tels1_trf2 %>%
dplyr::select(patient, TELS1_group, TRF2_group, vital_status) %>%
dplyr::filter(!is.na(TELS1_group), !is.na(TRF2_group), !is.na(vital_status))
ggplot(alluvial_df,
aes(axis1 = TELS1_group, axis2 = TRF2_group, axis3 = vital_status)) +
geom_alluvium(aes(fill = vital_status), width = 1/12) +
geom_stratum(width = 1/12, fill = "grey80", color = "black") +
geom_text(stat = "stratum", aes(label = paste0(after_stat(stratum), "\n(n=", after_stat(n), ")"))) +
scale_x_discrete(limits = c("TELS1", "TRF2", "Vital Status"), expand = c(.05, .05)) +
scale_fill_viridis_d(option = "cividis") +
theme_bw() +
labs(title = "Alluvial Plot of TELS1/TRF2 Groups and Vital Status")