Files
mole_broad_spectrum_parallel/workflow/05.analyze_mce_predictions.Rmd
mm644706215 a56e60e9a3 first add
2025-10-16 17:21:48 +08:00

1517 lines
46 KiB
Plaintext

---
title: "Analysing MCE predictions"
author: "Roberto Olayo Alarcon"
date: "27/03/2024"
output: github_document
---
In this file we analyse the predictions made in the previous step. At this point a literature search has been performed in order to validate predictions
```{r setup, message=FALSE}
library(tidyverse)
library(ggrepel)
library(readxl)
library(uwot)
```
## Prepare directories.
```{r prep.directories}
PREDICTIONS_DIR <- "../data/04.new_predictions"
OUTPUT_DIR <- "../data/05.analyze_mce_predictions"
# Create output dir
if(!dir.exists(OUTPUT_DIR)){
dir.create(OUTPUT_DIR)
}
```
## Library Representation.
In this section, we perform dimensionality reduction on the MolE representation of the MedChemExpress library for which we make predictions.
```{r read.mole}
# Read Prediction Output
mole_predictions <- read_excel(file.path(PREDICTIONS_DIR, "mole_mce_predictions_litsearch.xlsx"), sheet = "mole_prediction_overview")
mole_predictions.over10 <- read_excel(file.path(PREDICTIONS_DIR, "mole_mce_predictions_litsearch.xlsx"), sheet = "mole_over10") %>%
mutate(`Reported Activity` = if_else(is.na(`Reported Activity`), "None", `Reported Activity`))
# Read the representation
mole_representation <- vroom::vroom(file.path(PREDICTIONS_DIR, "MolE_representation_medchemexpress.tsv.gz"),
show_col_types = FALSE) %>%
# Rename id column
rename("Catalog Number" = "...1") %>%
# Filter for the molecules for which we make predictions
filter(`Catalog Number` %in% mole_predictions$`Catalog Number`) %>%
# Set column to row.names
column_to_rownames("Catalog Number")
```
```{r read.fps}
# Read Prediction Output
ecfp4_predictions <- read_excel(file.path(PREDICTIONS_DIR, "ecfp4_mce_predictions_litsearch.xlsx"), sheet = "ecfp4_prediction_overview")
ecfp4_predictions.over10 <- read_excel(file.path(PREDICTIONS_DIR, "ecfp4_mce_predictions_litsearch.xlsx"), sheet = "ecfp4_over10") %>%
mutate(`Reported Activity` = if_else(is.na(`Reported Activity`), "None", `Reported Activity`))
# Read the representation
ecfp4_representation <- vroom::vroom(file.path(PREDICTIONS_DIR, "ecfp4_representation_medchemexpress.tsv.gz"),
show_col_types = FALSE) %>%
# Rename id column
rename("Catalog Number" = "...1") %>%
# Filter for the molecules for which we make predictions
filter(`Catalog Number` %in% mole_predictions$`Catalog Number`) %>%
# Set column to row.names
column_to_rownames("Catalog Number")
```
### Preprocess representation.
Remove constant and correlated features
```{r preprocess.func}
remove_constant <- function(m.df, var.cutoff){
#' Removes constant columns from a data frame based on a variance threshold.
#'
#' @param m.df A data frame. Input data frame.
#' @param var.cutoff A numeric. Variance threshold. Columns with variance below this threshold will be removed.
#'
#' @return A data frame. Data frame with constant columns removed.
#'
#' @examples
#' remove_constant(m.df = my_data_frame, var.cutoff = 0.01)
#'
#' The function calculates the variance of each dimension in the input data frame.
#' It then identifies columns with variance above the specified threshold and removes constant columns.
#' The resulting data frame without constant columns is returned.
#'
# Variance of each dimension
var.feats <- apply(m.df, 2, var)
# Keep columns above variance thresh
keep_cols <- names(var.feats[var.feats > var.cutoff])
# Filter df
m.var.df <- m.df %>%
select(all_of(as.character(keep_cols)))
return(m.var.df)
}
remove_corfeats <- function(m.df, cor.thresh){
#' Removes correlated features from a data frame based on a correlation threshold.
#'
#' @param m.df A data frame. Input data frame.
#' @param cor.thresh A numeric. Correlation threshold.
#'
#' @return A data frame. Data frame with correlated features removed.
#'
#' @examples
#' remove_corfeats(m.df = my_data_frame, cor.thresh = 0.7)
#'
#' The function calculates the correlation matrix of the input data frame.
#' It then identifies correlated columns based on the correlation threshold.
#' Columns with correlations above the threshold are removed.
#' The resulting data frame without correlated features is returned.
#'
# Correlation Matrix
cor.mat <- cor(m.df)
# Format triangle
cor.mat[lower.tri(cor.mat, diag = TRUE)] <- 0
# Find correlated columns
corr.descision <- apply(cor.mat, 2, function(x){ifelse(any(x >= cor.thresh), "remove", "remain")})
# Keep columns
keep_cols <- names(corr.descision[corr.descision == "remain"])
# Filter df
m.uncor <- m.df %>%
select(all_of(keep_cols))
return(m.uncor)
}
preprocess_mole <- function(mole_df, min.var, corr.thresh){
#' Preprocesses molecular data by filtering out constant columns and correlated features.
#'
#' @param mole_df A data frame. Input molecular data frame.
#' @param min.var A numeric. Variance threshold for removing constant columns.
#' @param corr.thresh A numeric. Correlation threshold for removing correlated features.
#'
#' @return A data frame. Preprocessed molecular data frame.
#'
#' @examples
#' preprocess_mole(mole_df = my_molecular_data_frame, min.var = 0.01, corr.thresh = 0.7)
#'
#' The function preprocesses molecular data by first removing constant columns using the specified variance threshold.
#' Then, it removes correlated features based on the correlation threshold.
#' The resulting data frame is the preprocessed molecular data with constant columns and correlated features removed.
#'
# Filter variance
mole.variable <- remove_constant(mole_df, min.var)
# Filter correlated
mole.decorrelated <- remove_corfeats(mole.variable, corr.thresh)
return(mole.decorrelated)
}
# Prepare data
mole_prepared <- preprocess_mole(mole_representation, min.var=0.01, corr.thresh=0.90)
mole_prepared %>% dim()
```
Similar procedure for ECFP4 representation
```{r preprocess.fps}
hamming <- function(X) {
D <- (1 - X) %*% t(X)
HamDist <- D + t(D)
return(HamDist / ncol(X))
}
remove_rare <- function(f.df, min_chems){
# Removes columns where a feature is not present in the majority of compounds
#
# Parameters
# ----------
# - fdf: pandas dataframe. Columns are bits of the fingerprint. Rows are compounds
# - mc: int. The minimum number of molecules a feature should be present in order for it to be preserved
# Number of chemicals a feature is present in
n.freq <- colSums(f.df)
names(n.freq) <- colnames(f.df)
# Filter popular features
common.features <- n.freq[n.freq >= min_chems]
# Alter df
f.common <- f.df %>%
select(all_of(names(common.features)))
return(f.common)
}
remove_correlated <- function(f.df, min_dist){
# Removes highly similar features
#
# Parameters
# ----------
# - fdf: pandas dataframe. Dataframe where columns are bits in the fingerprint
# - mdist: float. Is minimal distance between features in order to be different enough
#
# Returns
# -------
# pandas dataframe
# Highly correlated features are removed (one of them is preserved)
# Hamming distance
dist.matrix <- hamming(t(f.df))
# Format
dist.matrix[lower.tri(dist.matrix, diag = TRUE)] <- 1
# Find correlated columns
corr.descision <- apply(dist.matrix, 2, function(x){ifelse(any(x <= min_dist), "remove", "remain")})
# Only keep un-correlated feats
keep_cols <- names(corr.descision[corr.descision == "remain"])
# Filter df
f.uncorr <- f.df %>%
select(all_of(as.character(keep_cols)))
return(f.uncorr)}
preprocess_fps <- function(fps_df, min_compounds, sim_threshold){
# Processes a fingerprint dataframe. Removes rare and highly correlated features
#
# Parameters
# ----------
# - fps_df: pandas dataframe. Dataframe where columns are bits in the fingerprint
# - min_compounds: int. The minimum number of molecules a feature should be present in order for it to be preserved
# - sim_threshold: float. Is minimal distance between features in order to be different enough
fps_common <- remove_rare(fps_df, min_compounds)
fps.decorr <- remove_correlated(fps_common, sim_threshold)
return(fps.decorr)
}
ecfp4_prepared <- preprocess_fps(ecfp4_representation, 10, 0.01)
ecfp4_prepared %>% dim()
```
### Peform UMAP reduction.
```{r mole.umap}
set.seed(42)
mole.umap <- umap(X=mole_prepared,
n_neighbors = 25,
n_components = 2,
min_dist = 0.35,
n_threads = 20,
metric="cosine")
mole.umap <- as.data.frame(mole.umap) %>%
rownames_to_column("catalog_number")
```
```{r mole.umap.plot}
mole.umap %>%
ggplot(aes(x=V1, y=V2)) +
geom_point(alpha=0.9, color="white", fill="#C5C5C5",
size=2, shape=21, stroke=0.2) +
theme_classic() +
labs(x="UMAP 1",
y = "UMAP 2",
title = "MolE representation of MCE")
```
```{r ecfp4.umap}
# Jaccard distance
jac.dist <- dist(ecfp4_prepared, method = "binary")
set.seed(1234)
ecfp4.umap <- umap(X=jac.dist,
n_neighbors = 20,
n_components = 2,
min_dist = 0.3,
n_threads = 20)
ecfp4.umap <- as.data.frame(ecfp4.umap) %>%
rownames_to_column("catalog_number")
```
```{r ecfp4.umap.plot}
ecfp4.umap %>%
ggplot(aes(x=V1, y=V2)) +
geom_point(alpha=0.9, color="white", fill="#C5C5C5",
size=2, shape=21, stroke=0.2) +
theme_classic() +
labs(x="UMAP 1",
y = "UMAP 2",
title = "ECFP4 representation of MCE")
```
### Plotting predicted broad-spectrum.
```{r mole.umap.over10}
# Add information of broad-spectrum activity
mole.umap <- mole.umap %>%
mutate(pred_activity = if_else(catalog_number %in% mole_predictions.over10$`Catalog Number`, "Antimicrobial", "Not Antimicrobial"))
# Plot
m.umap.over10 <- ggplot(mole.umap, aes(x=V1, y=V2)) +
geom_point(data=subset(mole.umap, pred_activity == "Not Antimicrobial"), aes(color="Not Antimicrobial"), size=1.5) +
geom_point(data=subset(mole.umap, pred_activity == "Antimicrobial"), aes(color="Antimicrobial"), size=1.5) +
scale_color_manual(values=c("Antimicrobial" = alpha("#DE1F84", 0.7), "Not Antimicrobial" = alpha("#C5C5C5", 0.7))) +
theme_void() +
#coord_fixed(ratio = 0.4) +
theme(legend.position = "bottom",
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
text=element_text(size=10),
panel.background = element_rect(fill = "transparent", color=NA),
plot.background = element_rect(fill = "transparent", colour = NA)) +
labs(x="UMAP 1",
y="UMAP 2",
color="Predicted Activity")
m.umap.over10
```
```{r annotating.litchems.mole}
chems_interst <- mole_predictions.over10 %>%
filter(ProductName %in% c("Ospemifene", "Shionone", "Bekanamycin", "Doxifluridine", "Ellagic acid")) %>%
select(`Catalog Number`, ProductName) %>%
rename("catalog_number" = "Catalog Number")
chems_interest_umap <- chems_interst %>%
left_join(mole.umap, by="catalog_number")
# Plot
m.umap.over10.annot <- ggplot(mole.umap, aes(x=V1, y=V2)) +
geom_point(data=subset(mole.umap, pred_activity == "Not Antimicrobial"), aes(color="Not Antimicrobial"), size=1.5) +
geom_point(data=subset(mole.umap, pred_activity == "Antimicrobial"), aes(color="Antimicrobial"), size=1.5) +
geom_text_repel(data = chems_interest_umap, aes(label=ProductName),
max.overlaps = Inf, size=2, min.segment.length = 0, color="black", box.padding = 0.7, point.padding = 0.2, fontface="bold") +
scale_color_manual(values=c("Antimicrobial" = alpha("#DE1F84", 0.7), "Not Antimicrobial" = alpha("#C5C5C5", 0.7))) +
theme_void() +
#coord_fixed(ratio = 0.4) +
theme(legend.position = "bottom",
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
text=element_text(size=10),
panel.background = element_rect(fill = "transparent", color=NA),
plot.background = element_rect(fill = "transparent", colour = NA)) +
labs(x="UMAP 1",
y="UMAP 2",
color="Predicted Activity")
m.umap.over10.annot
```
```{r}
meta_info <- mole_predictions %>%
select(`Catalog Number`, nk_total, ProductName) %>%
rename("catalog_number" = "Catalog Number")
mole.umap %>%
left_join(meta_info, by="catalog_number") %>%
rename("umap1" = "V1",
"umap2" = "V2",
"pred_n_inhibited_total" = "nk_total") %>%
write_tsv("../data/05.analyze_mce_predictions/umap_mce_predictions.tsv")
```
```{r ecfp4.umap.over10}
# Add information of broad-spectrum activity
ecfp4.umap <- ecfp4.umap %>%
mutate(pred_activity = if_else(catalog_number %in% ecfp4_predictions.over10$`Catalog Number`, "Antimicrobial", "Not Antimicrobial"))
# Plot
e.umap.over10 <- ggplot(ecfp4.umap, aes(x=V1, y=V2)) +
geom_point(data=subset(ecfp4.umap, pred_activity == "Not Antimicrobial"), aes(color="Not Antimicrobial"), size=1.5) +
geom_point(data=subset(ecfp4.umap, pred_activity == "Antimicrobial"), aes(color="Antimicrobial"), size=1.5) +
scale_color_manual(values=c("Antimicrobial" = alpha("#DE1F84", 0.7), "Not Antimicrobial" = alpha("#C5C5C5", 0.7)),
labels=c("Broad Spectrum", "Not Broad Spectrum")) +
theme_void() +
#coord_fixed(ratio = 0.4) +
theme(legend.position = "bottom",
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
text=element_text(size=10),
panel.background = element_rect(fill = "transparent", color=NA),
plot.background = element_rect(fill = "transparent", colour = NA)) +
labs(x="UMAP 1",
y="UMAP 2",
color="Predicted Activity")
e.umap.over10
```
```{r annotating.litchems.fps}
chems_interst <- mole_predictions.over10 %>%
filter(ProductName %in% c("Ospemifene", "Shionone", "Bekanamycin", "Doxifluridine", "Ellagic acid")) %>%
select(`Catalog Number`, ProductName) %>%
rename("catalog_number" = "Catalog Number")
chems_interest_umap <- chems_interst %>%
left_join(ecfp4.umap, by="catalog_number")
e.umap.over10.annot <- ggplot(ecfp4.umap, aes(x=V1, y=V2)) +
geom_point(data=subset(ecfp4.umap, pred_activity == "Not Antimicrobial"), aes(color="Not Antimicrobial"), size=1.5) +
geom_point(data=subset(ecfp4.umap, pred_activity == "Antimicrobial"), aes(color="Antimicrobial"), size=1.5) +
geom_text_repel(data = chems_interest_umap, aes(label=ProductName),
max.overlaps = Inf, size=2, min.segment.length = 0, color="black", box.padding = 0.7, point.padding = 0.2, fontface="bold") +
scale_color_manual(values=c("Antimicrobial" = alpha("#DE1F84", 0.7), "Not Antimicrobial" = alpha("#C5C5C5", 0.7)),
labels=c("Broad Spectrum", "Not Broad Spectrum")) +
theme_void() +
#coord_fixed(ratio = 0.4) +
theme(legend.position = "bottom",
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
text=element_text(size=10),
panel.background = element_rect(fill = "transparent", color=NA),
plot.background = element_rect(fill = "transparent", colour = NA)) +
labs(x="UMAP 1",
y="UMAP 2",
color="Predicted Activity")
e.umap.over10.annot
ggsave(plot=e.umap.over10.annot,
filename = "../data/05.analyze_mce_predictions/ecfp4_mce_umap.pdf",
width = 21, height = 15, units="cm", dpi=300)
```
```{r}
meta_info <- ecfp4_predictions %>%
select(`Catalog Number`, nk_total, ProductName) %>%
rename("catalog_number" = "Catalog Number")
ecfp4.umap %>%
left_join(meta_info, by="catalog_number") %>%
rename("umap1" = "V1",
"umap2" = "V2",
"pred_n_inhibited_total" = "nk_total") %>%
write_tsv("../data/05.analyze_mce_predictions/umap_mce_predictions_ecfp4.tsv")
```
## Ranking molecule predictions.
Here we will create plots that compare the antimicrobial activity and the number of predicted inhibited strains.
```{r plot.ap.vs.nk}
# Format labels
scatter.nkill.apscore <- mole_predictions %>%
mutate(antibiotic = if_else(antibiotic == "abx", "Antibiotic", "Non-Antibiotic")) %>%
# Create plots
ggplot(aes(x=nk_total, y=apscore_total, color=antibiotic)) +
geom_point(alpha=0.5, size=1) +
scale_color_manual(breaks = c("Antibiotic", "Non-Antibiotic"),
values=c("red", "#C5C5C5")) +
geom_vline(xintercept = 10, linetype="longdash") +
theme_light() +
labs(x="Predicted number of inhibited strains",
y = "Antimicrobial Potential",
color="Compound Class") +
theme(legend.position = c(0.7, 0.3),
text=element_text(size=10),
legend.text = element_text(size=8),
legend.title = element_text(size=8))
score.vs.nkill.marginal <- ggExtra::ggMarginal(scatter.nkill.apscore, type="boxplot", groupColour = TRUE, groupFill = TRUE)
score.vs.nkill.marginal
```
```{r plot.ap.vs.nk.e}
# Format labels
scatter.nkill.apscore.e <- ecfp4_predictions %>%
mutate(antibiotic = if_else(antibiotic == "abx", "Antibiotic", "Non-Antibiotic")) %>%
# Create plots
ggplot(aes(x=nk_total, y=apscore_total, color=antibiotic)) +
geom_point(alpha=0.5, size=1) +
scale_color_manual(breaks = c("Antibiotic", "Non-Antibiotic"),
values=c("red", "#C5C5C5")) +
geom_vline(xintercept = 10, linetype="longdash") +
theme_light() +
labs(x="Predicted number of inhibited strains",
y = "Antimicrobial Potential",
color="Compound Class") +
theme(legend.position = c(0.7, 0.3),
text=element_text(size=10),
legend.text = element_text(size=8),
legend.title = element_text(size=8))
score.vs.nkill.marginal.e <- ggExtra::ggMarginal(scatter.nkill.apscore.e, type="boxplot", groupColour = TRUE, groupFill = TRUE)
score.vs.nkill.marginal.e
```
## Comparing Gram stains.
Here we compare the antimicrobial potential scores for gram negatives and gram positives.
```{r gneg.vs.gpos.labels}
# Selected chemicals
selected_chems <- c("Visomitin",
"Ebastine",
"Opicapone",
"Cetrorelix (Acetate)",
'Thymidine',
"Elvitegravir")
# Uridine derivatives
uridin.deriv <- c("Uridine",
"Uridine 5'-monophosphate",
"5-Methyluridine",
"2'-Deoxyuridine",
"Doxifluridine")
# Other interests
# Other interests
other.interest <- c("Tannic acid",
"Teniposide",
"Mequitazine",
"Dimetridazole",
"Azelnipidine",
"Luliconazole",
"Bergenin")
# Format Product Names
selected.chemicals.data <- mole_predictions.over10 %>%
mutate(ProductName = if_else(ProductName %in% selected_chems, ProductName, ""),
ProductName = if_else(ProductName == "Cetrorelix (Acetate)", "Cetrorelix", ProductName))
uridine.deriv.data <- mole_predictions.over10 %>%
mutate(ProductName = if_else(ProductName %in% uridin.deriv, ProductName, ""))
other.interest.data <- mole_predictions.over10 %>%
mutate(ProductName = if_else(ProductName %in% other.interest, ProductName, ""))
# Plot
predBS.mole <- mole_predictions.over10 %>%
# Only non-antibiotics
filter(antibiotic == "not_abx") %>%
mutate(`Reported Activity` = if_else(`Reported Activity` %in% c("Antiplasmodium", "Insecticide"), "Antiparasitic", `Reported Activity`)) %>%
ggplot(aes(x=apscore_gnegative, y=apscore_gpositive, color=`Reported Activity`)) +
# Basic aes
geom_point(size=1) +
geom_abline(linetype="longdash", alpha=0.25) +
# Add names
geom_text_repel(data = selected.chemicals.data, aes(x=apscore_gnegative, y=apscore_gpositive, label=ProductName),
max.overlaps = Inf, size=2, min.segment.length = 0, color="black", box.padding = 0.7, point.padding = 0.2, fontface="bold",
nudge_x = if_else(selected.chemicals.data$ProductName %in% c("Cetrorelix", "Ebastine"), -1, 0)) +
geom_text_repel(data = uridine.deriv.data, aes(x=apscore_gnegative, y=apscore_gpositive, label=ProductName),
max.overlaps = Inf, size=1.8, min.segment.length = 0, color="black", box.padding = 0.7, point.padding = 0.2,
nudge_x = if_else(uridine.deriv.data$ProductName %in% c("Uridine"), -1, 0)) +
geom_text_repel(data = other.interest.data, aes(x=apscore_gnegative, y=apscore_gpositive, label=ProductName),
max.overlaps = Inf, size=1.8, min.segment.length = 0, color="black", box.padding = 0.7, point.padding = 0.2) +
# Color by reported activity
scale_color_manual(breaks = c("Antibacterial", "Antiviral", "Antifungal", "Antiparasitic", "None"),
values = c("#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#C5C5C5")) +
theme_light() +
theme(legend.position = "bottom",
text = element_text(size=10)) +
labs(x = latex2exp::TeX("Antimicrobial Potential $G^-$"),
y = latex2exp::TeX("Antimicrobial Potential $G^+$"))
predBS.mole
```
```{r gneg.vs.gpos.labels.e}
# Selected chemicals
selected_chems <- c("Visomitin",
"Ebastine",
"Opicapone",
"Cetrorelix (Acetate)",
'Thymidine',
"Elvitegravir")
# Uridine derivatives
uridin.deriv <- c("Uridine",
"Uridine 5'-monophosphate",
"5-Methyluridine",
"2'-Deoxyuridine",
"Doxifluridine")
# Other interests
other.interest <- c("Tannic acid",
"Teniposide",
"Dimetridazole",
"Azelnipidine",
"Luliconazole",
"Bergenin")
# Format Product Names
selected.chemicals.data <- ecfp4_predictions.over10 %>%
mutate(ProductName = if_else(ProductName %in% selected_chems, ProductName, ""),
ProductName = if_else(ProductName == "Cetrorelix (Acetate)", "Cetrorelix", ProductName))
uridine.deriv.data <- ecfp4_predictions.over10 %>%
mutate(ProductName = if_else(ProductName %in% uridin.deriv, ProductName, ""))
other.interest.data <- ecfp4_predictions.over10 %>%
mutate(ProductName = if_else(ProductName %in% other.interest, ProductName, ""))
# Plot
predBS.fps <- ecfp4_predictions.over10 %>%
# Only non-antibiotics
filter(antibiotic == "not_abx") %>%
mutate(`Reported Activity` = if_else(`Reported Activity` %in% c("Antiplasmodium", "Insecticide"), "Antiparasitic", `Reported Activity`)) %>%
ggplot(aes(x=apscore_gnegative, y=apscore_gpositive, color=`Reported Activity`)) +
# Basic aes
geom_point(size=1) +
geom_abline(linetype="longdash", alpha=0.25) +
# Add names
geom_text_repel(data = selected.chemicals.data, aes(x=apscore_gnegative, y=apscore_gpositive, label=ProductName),
max.overlaps = Inf, size=2, min.segment.length = 0, color="black", box.padding = 0.7, point.padding = 0.2, fontface="bold",
nudge_x = if_else(selected.chemicals.data$ProductName %in% c("Cetrorelix", "Ebastine"), -1, 0)) +
geom_text_repel(data = uridine.deriv.data, aes(x=apscore_gnegative, y=apscore_gpositive, label=ProductName),
max.overlaps = Inf, size=1.8, min.segment.length = 0, color="black", box.padding = 0.7, point.padding = 0.2,
nudge_x = if_else(uridine.deriv.data$ProductName %in% c("Uridine"), -1, 0)) +
geom_text_repel(data = other.interest.data, aes(x=apscore_gnegative, y=apscore_gpositive, label=ProductName),
max.overlaps = Inf, size=1.8, min.segment.length = 0, color="black", box.padding = 0.7, point.padding = 0.2) +
# Color by reported activity
scale_color_manual(breaks = c("Antibacterial", "Antiviral", "Antifungal", "Antiparasitic", "None"),
values = c("#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#C5C5C5")) +
theme_light() +
theme(legend.position = "bottom",
text = element_text(size=10)) +
labs(x = latex2exp::TeX("Antimicrobial Potential $G^-$"),
y = latex2exp::TeX("Antimicrobial Potential $G^+$"))
predBS.fps
```
## Propotion with reported activity
```{r activity.counts}
# Get the activity counts
proportion.known.activity <- mole_predictions.over10 %>%
# Only consider non-antibiotics
filter(antibiotic == "not_abx") %>%
# Consolidate antiparasitic category
mutate(`Reported Activity` = if_else(`Reported Activity` %in% c("Antiplasmodium", "Insecticide"), "Antiparasitic", `Reported Activity`)) %>%
count(`Reported Activity`) %>%
mutate(`Reported Activity` = factor(`Reported Activity`, levels=c("None", "Antiparasitic", "Antifungal", "Antiviral","Antibacterial")),
x=" ")
# Gather
reported.g <- ggplot(proportion.known.activity, aes(x=n, y=x, fill=`Reported Activity`)) +
geom_bar(position="fill", stat="identity", color="black") +
scale_fill_manual(breaks = c("Antibacterial", "Antiviral", "Antifungal", "Antiparasitic", "None"),
values = c("#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#C5C5C5")) +
theme_classic() +
coord_flip() +
theme(text=element_text(size=10),
legend.position = "right",
aspect.ratio = 3,
legend.text = element_text(size = 8),
legend.title = element_text(size=8)) +
labs(x="Proportion with reported activity",
y="Predicted antimicrobial",
fill = "Reported Activity")
reported.g
```
```{r activity.counts.e}
# Get the activity counts
proportion.known.activity.e <- ecfp4_predictions.over10 %>%
# Only consider non-antibiotics
filter(antibiotic == "not_abx") %>%
# Consolidate antiparasitic category
mutate(`Reported Activity` = if_else(`Reported Activity` %in% c("Antiplasmodium", "Insecticide"), "Antiparasitic", `Reported Activity`)) %>%
count(`Reported Activity`) %>%
mutate(`Reported Activity` = factor(`Reported Activity`, levels=c("None", "Antiparasitic", "Antifungal", "Antiviral","Antibacterial")),
x=" ")
# Gather
reported.g.e <- ggplot(proportion.known.activity.e, aes(x=n, y=x, fill=`Reported Activity`)) +
geom_bar(position="fill", stat="identity", color="black") +
scale_fill_manual(breaks = c("Antibacterial", "Antiviral", "Antifungal", "Antiparasitic", "None"),
values = c("#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#C5C5C5")) +
theme_classic() +
coord_flip() +
theme(text=element_text(size=10),
legend.position = "right",
aspect.ratio = 3,
legend.text = element_text(size = 8),
legend.title = element_text(size=8)) +
labs(x="Proportion with reported activity",
y="Predicted antimicrobial",
fill = "Reported Activity")
reported.g.e
```
## Comparing MolE and ECFP4 predictions.
```{r global.comparison.ap}
mole_complete_comp <- mole_predictions %>%
select(`Catalog Number`, nk_total, apscore_total, antibiotic, ProductName) %>%
rename("mole_nk_total" = "nk_total",
"mole_apscore_total" = "apscore_total")
ecfp4_complete_comp <- ecfp4_predictions %>%
select(`Catalog Number`, nk_total, apscore_total) %>%
rename("ecfp4_nk_total" = "nk_total",
"ecfp4_apscore_total" = "apscore_total")
complete_pred_comparison <- ecfp4_complete_comp %>%
left_join(mole_complete_comp, by='Catalog Number') %>%
mutate(antibiotic = if_else(antibiotic == "abx", "Antibiotic", "Non-Antibiotic"))
```
```{r global.comparison.nk}
ggplot(complete_pred_comparison, aes(x=mole_nk_total, y=ecfp4_nk_total, color=antibiotic)) +
geom_point(alpha=0.5, size=1) +
geom_abline(linetype="longdash", alpha=0.5, color="blue") +
geom_vline(xintercept = 10,linetype="longdash", alpha=0.5, color="black") +
geom_hline(yintercept = 10,linetype="longdash", alpha=0.5, color="black") +
scale_color_manual(breaks = c("Antibiotic", "Non-Antibiotic"),
values=c("red", "#C5C5C5")) +
theme_light() +
labs(title="Predicted number of inhibited strains comparison",
x="MolE",
y="ECFP4",
color="Compound Class") +
theme(text=element_text(size=10),
legend.text = element_text(size=8),
legend.title = element_text(size=8))
```
Comparing predicted broad spectrum compounds.
```{r ggvenn.lib}
library(ggvenn)
```
```{r broad.spec.venn}
fsize <- 3.5
# Complete comparison of broad spectrum antimicrobials
broad.list <- list("MolE" = mole_predictions.over10$`Catalog Number`,
"ECFP4" = ecfp4_predictions.over10$`Catalog Number`)
venn.abx.comparison <- ggvenn(broad.list,
fill_color = c("#DE1F84", "#1F9DBB"),
set_name_size=fsize,
text_size=fsize)
venn.abx.comparison
# Comparison of broad spectrum antimicrobials no antibiotics
broad.list <- list("MolE" = mole_predictions.over10 %>% filter(antibiotic == "not_abx",
`Reported Activity` != "None") %>% select(`Catalog Number`) %>% unlist(),
"ECFP4" = ecfp4_predictions.over10 %>% filter(antibiotic == "not_abx",
`Reported Activity` != "None") %>% select(`Catalog Number`) %>% unlist())
venn.nonabx.comparison <- ggvenn(broad.list,
fill_color = c("#DE1F84", "#1F9DBB"),
set_name_size=fsize,
text_size=fsize)
venn.nonabx.comparison
```
Now compare results from literature search.
```{r litsearch.comparison}
proportion.known.activity <- proportion.known.activity %>%
mutate(representation = "MolE")
proportion.known.activity.e <- proportion.known.activity.e %>%
mutate(representation = "ECFP4")
proportion.comparison <- bind_rows(proportion.known.activity,
proportion.known.activity.e)
# Gather totals for n
total_count = proportion.comparison %>%
group_by(representation) %>%
summarise(total = sum(n)) %>%
mutate(tx_sum = paste("n =", total))
reported.bar.comparison <- ggplot(proportion.comparison, aes(x=n, y=representation)) +
annotate("text", y=c("ECFP4", "MolE"), x=1.04, label=total_count$tx_sum,) +
geom_bar(position="fill", stat="identity", color="black", aes(fill=`Reported Activity`)) +
scale_fill_manual(breaks = c("Antibacterial", "Antiviral", "Antifungal", "Antiparasitic", "None"),
values = c("#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#C5C5C5")) +
theme_classic() +
coord_flip() +
theme(text=element_text(size=10),
legend.position = "right",
aspect.ratio = 2,
legend.text = element_text(size = 8),
legend.title = element_text(size=8)) +
labs(y="",
x="Proportion",
fill = "Reported Activity")
reported.bar.comparison
```
```{r proportion.abx.broad}
mole.o10.abx <- mole_predictions.over10 %>%
count(antibiotic) %>%
mutate(representation = "MolE")
ecfp4.o10.abx <- ecfp4_predictions.over10 %>%
count(antibiotic) %>%
mutate(representation = "ECFP4")
over10.abx <- bind_rows(mole.o10.abx, ecfp4.o10.abx) %>%
mutate(antibiotic = if_else(antibiotic == "abx", "Antibiotic", "Other"),
antibiotic = factor(antibiotic, levels=c("Other", "Antibiotic")))
abx.bar.comparison <- ggplot(over10.abx, aes(x=n, y=representation, fill=antibiotic)) +
geom_bar(position="fill", stat="identity", color="black") +
scale_fill_manual(breaks = c("Other", "Antibiotic"),
values = c("#C5C5C5", "red")) +
theme_classic() +
coord_flip() +
theme(text=element_text(size=10),
legend.position = "right",
aspect.ratio = 2,
legend.text = element_text(size = 8),
legend.title = element_text(size=8)) +
labs(y="",
x="Proportion",
fill = "Compound Class")
abx.bar.comparison
```
Comparing predicted broad spectrum.
```{r comparing.over10}
mole.litsearch <- mole_predictions.over10 %>%
filter(antibiotic == "not_abx") %>%
select(`Catalog Number`, `Reported Activity`, ProductName) %>%
# Consolidate antiparasitic category
mutate(`Reported Activity` = if_else(`Reported Activity` %in% c("Antiplasmodium", "Insecticide"), "Antiparasitic", `Reported Activity`))
ecfp4.litsearch <- ecfp4_predictions.over10 %>%
filter(antibiotic == "not_abx") %>%
select(`Catalog Number`, `Reported Activity`, ProductName) %>%
# Consolidate antiparasitic category
mutate(`Reported Activity` = if_else(`Reported Activity` %in% c("Antiplasmodium", "Insecticide"), "Antiparasitic", `Reported Activity`))
combined.litsearch <- bind_rows(mole.litsearch, ecfp4.litsearch) %>%
distinct()
mole_o10 <- mole_predictions %>%
filter(`Catalog Number` %in% combined.litsearch$`Catalog Number`) %>%
select(`Catalog Number`, nk_total, apscore_total) %>%
rename("mole_nk_total" = "nk_total",
"mole_apscore_total" = "apscore_total")
ecfp4_o10 <- ecfp4_predictions %>%
filter(`Catalog Number` %in% combined.litsearch$`Catalog Number`) %>%
select(`Catalog Number`, nk_total, apscore_total) %>%
rename("ecfp4_nk_total" = "nk_total",
"ecfp4_apscore_total" = "apscore_total")
broadS.comp <- combined.litsearch %>%
left_join(mole_o10, by="Catalog Number") %>%
left_join(ecfp4_o10, by="Catalog Number") %>%
filter(`Reported Activity` != "None")
```
```{r broadS.comp}
# Selected chemicals
selected_chems <- c("Visomitin",
"Ebastine",
"Opicapone",
"Cetrorelix (Acetate)",
'Thymidine',
"Elvitegravir")
# Other interests
other.interest <- c("Bergenin",
"Fenticonazole (Nitrate)",
"Doxifluridine",
"Broxyquinoline",
"Triclabendazole",
"Octenidine (dihydrochloride)",
"Benzalkonium (chloride)",
"Simeprevir")
# Format Product Names
selected.chemicals.data <- broadS.comp %>%
mutate(ProductName = if_else(ProductName %in% selected_chems, ProductName, ""),
ProductName = if_else(ProductName == "Cetrorelix (Acetate)", "Cetrorelix", ProductName))
other.interest.data <- broadS.comp %>%
mutate(ProductName = if_else(ProductName %in% other.interest, ProductName, ""),
ProductName = if_else(ProductName == "Fenticonazole (Nitrate)", "Fenticonazole", ProductName),
ProductName = if_else(ProductName == "Octenidine (dihydrochloride)", "Octenidine", ProductName),
ProductName = if_else(ProductName == "Benzalkonium (chloride)", "Benzalkonium", ProductName))
nk.comparison <- ggplot(broadS.comp, aes(x=mole_nk_total, y=ecfp4_nk_total, color=`Reported Activity`)) +
geom_vline(xintercept = 10, linetype="longdash", color="black", alpha=0.5) +
geom_hline(yintercept=10, linetype="longdash", color="black", alpha=0.5) +
geom_point(alpha=0.5) +
geom_abline(linetype="longdash", color="red", alpha=0.5) +
# Add names
geom_text_repel(data = selected.chemicals.data, aes(x=mole_nk_total, y=ecfp4_nk_total, label=ProductName),
max.overlaps = Inf, size=2, min.segment.length = 0, color="black", box.padding = 0.7, point.padding = 0.2, fontface="bold",
nudge_x = if_else(selected.chemicals.data$ProductName %in% c("Elvitegravir"), 1, 0)) +
geom_text_repel(data = other.interest.data, aes(x=mole_nk_total, y=ecfp4_nk_total, label=ProductName),
max.overlaps = Inf, size=2, min.segment.length = 0, color="black", box.padding = 0.7, point.padding = 0.2,
nudge_y = if_else(other.interest.data$ProductName %in% c("Benzalkonium"), 1, 0)) +
scale_color_manual(breaks = c("Antibacterial", "Antiviral", "Antifungal", "Antiparasitic", "None"),
values = c("#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#C5C5C5")) +
theme_light() +
labs(x="MolE - N. inhibited strains",
y="ECFP4 - N. inhibited strains")
nk.comparison
```
## Figure panels.
```{r fig.arrange}
library(ggpubr)
```
```{r fps.fig.recreate}
top_row <- ggarrange(e.umap.over10 + theme(aspect.ratio = 0.4), reported.g.e, labels = c("a", "b"),
widths = c(2, 1),font.label = list(face="plain"))
bottom_row <- ggarrange(score.vs.nkill.marginal.e, predBS.fps + theme(legend.position = "none"),
labels = c("c", "d"), font.label = list(face="plain"))
pred.fps.panel <- ggarrange(top_row, bottom_row, ncol = 1, nrow = 2, heights = c(0.8, 1))
pred.fps.panel
ggsave(filename = "../data/05.analyze_mce_predictions/ecfp4_mce_overview.pdf",
plot=pred.fps.panel,
width = 21, height = 15, units="cm", dpi=300)
ggsave(filename = "../data/05.analyze_mce_predictions/ecfp4_mce_overview.svg",
plot=pred.fps.panel,
width = 21, height = 15, units="cm", dpi=300)
```
```{r mole.prediction.overview}
top_row <- ggarrange(m.umap.over10 + theme(aspect.ratio = 0.4), reported.g, labels = c("a", "b"),
widths = c(2, 1),font.label = list(face="plain"))
bottom_row <- ggarrange(score.vs.nkill.marginal, predBS.mole + theme(legend.position = "none"),
labels = c("c", "d"), font.label = list(face="plain"))
pred.mole.panel <- ggarrange(top_row, bottom_row, ncol = 1, nrow = 2, heights = c(0.8, 1))
pred.mole.panel
ggsave(filename = "../data/05.analyze_mce_predictions/mole_mce_overview.pdf",
plot=pred.mole.panel,
width = 21, height = 15, units="cm", dpi=300)
```
Combine complete prediction information
```{r combine.pred.info}
ecfp4_predranks <- ecfp4_predictions %>%
mutate(representation = "ECFP4")
mole_predranks <- mole_predictions %>%
mutate(representation = "MolE")
complete_predranks <- bind_rows(ecfp4_predranks, mole_predranks)
```
```{r known.abx.hist}
nk.abx.hist <- complete_predranks %>%
filter(antibiotic == "abx") %>%
ggplot(aes(x=nk_total, color=representation, color=representation)) +
geom_histogram(binwidth = 1, aes(y=after_stat(count)), position="dodge", alpha=0.1, fill="white") +
geom_density(alpha=0.2, aes(y=after_stat(count)), size=1.25, adjust=1/5) +
scale_color_manual(values=c("#1F9DBB", "#DE1F84")) +
theme_light() +
labs(x="Predicted number of inhibited strains",
y="Count",
color="Representation",
title = "Known antibiotics")
nk.abx.hist
```
```{r litsearch.comparison.}
mole.o10 <- mole_predictions.over10 %>%
filter(antibiotic == "not_abx") %>%
filter(`Reported Activity` != "None") %>%
select(`Catalog Number`) %>%
unlist()
ecfp4.o10 <- ecfp4_predictions.over10 %>%
filter(antibiotic == "not_abx") %>%
filter(`Reported Activity` != "None") %>%
select(`Catalog Number`) %>%
unlist()
ecfp4.comppreds <- complete_predranks %>%
filter(representation == "ECFP4",
`Catalog Number` %in% ecfp4.o10)
mole.comppreds <- complete_predranks %>%
filter(representation == "MolE",
`Catalog Number` %in% mole.o10)
complete_litsearch_comparison <- bind_rows(ecfp4.comppreds, mole.comppreds)
```
```{r litsearch.hist.nk}
nk.ls.hist <- ggplot(complete_litsearch_comparison, aes(x=nk_total, color=representation)) +
geom_histogram(binwidth = 1, aes(y=after_stat(count)), position="dodge", alpha=0.1, fill="white") +
geom_density(alpha=0.2, aes(y=after_stat(count)), size=1.25, adjust=1/4) +
scale_color_manual(values=c("#1F9DBB", "#DE1F84")) +
#scale_fill_manual(values=c("#1F9DBB", "#DE1F84")) +
theme_light() +
labs(x="Predicted number of inhibited strains",
y="Count",
color="Representation",
title = "Non-antibiotic antimicrobials")
nk.ls.hist
```
```{r panel.nkill.comp}
panel.comp.nk <- ggarrange(nk.abx.hist, nk.ls.hist,
nrow = 1, ncol = 2,
labels = c("a", "b"),
font.label = list(face="plain"),
common.legend = TRUE,
legend = "right")
panel.comp.nk
```
```{r literature.comparison}
panel.comp.lit <- ggarrange(reported.bar.comparison, nk.comparison,
nrow = 1, ncol = 2,
labels = c("c", "d"),
font.label = list(face="plain"),
common.legend = TRUE,
legend = "right")
panel.comp.lit
```
```{r complete.panel.comp}
complete.comp.panel <- ggarrange(panel.comp.nk,
NULL,
panel.comp.lit,
heights = c(1, 0.1, 1),
nrow = 3, ncol = 1)
ggsave(plot = complete.comp.panel,
filename = "../data/05.analyze_mce_predictions/mce_pred_comparison.pdf",
width = 21, height = 18, units="cm", dpi=300)
ggsave(plot = complete.comp.panel,
filename = "../data/05.analyze_mce_predictions/mce_pred_comparison.png",
width = 21, height = 18, units="cm", dpi=300)
complete.comp.panel
```
## Supplementary Files.
### SF8
```{r, eval=FALSE, include=FALSE}
sf8a <- complete_predranks_wide_abx <- complete_predranks %>%
filter(antibiotic=="abx") %>%
pivot_wider(id_cols = c(`Catalog Number`, ProductName, antibiotic), names_from = representation, values_from = nk_total) %>%
rename("ECFP4_predicted_n_inhibited_strains" = "ECFP4",
"MolE_predicted_n_inhibited_strains" = "MolE")
sf8bd <- broadS.comp %>%
mutate(antibiotic="not_abx") %>%
select(`Catalog Number`, ProductName, antibiotic, `Reported Activity`, mole_nk_total, ecfp4_nk_total) %>%
rename("ECFP4_predicted_n_inhibited_strains" = "ecfp4_nk_total",
"MolE_predicted_n_inhibited_strains" = "mole_nk_total")
sf8c <- proportion.comparison %>%
pivot_wider(id_cols = `Reported Activity`, names_from = representation, values_from = n)
```
```{r, eval=FALSE, include=FALSE}
library(xlsx)
```
```{r, eval=FALSE, include=FALSE}
write.xlsx2(sf8a, "../data/Supplementary Source Data/Supplementary Figure 8.xlsx",
sheetName = "SF8a",
col.names = TRUE,
row.names = TRUE,
append = FALSE,
showNA = FALSE)
write.xlsx2(sf8bd, "../data/Supplementary Source Data/Supplementary Figure 8.xlsx",
sheetName = "SF8b,d",
col.names = TRUE,
row.names = TRUE,
append = TRUE,
showNA = FALSE)
write.xlsx2(sf8c, "../data/Supplementary Source Data/Supplementary Figure 8.xlsx",
sheetName = "SF8c",
col.names = TRUE,
row.names = TRUE,
append = TRUE,
showNA = FALSE)
```
### SF13
```{r}
# Names
chem_names <- read_tsv("../data/04.new_predictions/medchemexpress_filtered.tsv.gz", show_col_types = FALSE)
# MolE scores
mole_scores <- read_excel("../data/04.new_predictions/mole_mce_predictions_litsearch.xlsx", sheet = "mole_scores") %>%
pivot_longer(cols = -chem_id, names_to = "strain", values_to = "MolE_predictive_probability") %>%
unite("pred_id", c(chem_id, strain), sep=":", remove=FALSE)
# ECFP4 scores
ecfp4_scores <- read_excel("../data/04.new_predictions/ecfp4_mce_predictions_litsearch.xlsx", sheet = "ecfp4_scores") %>%
pivot_longer(cols = -chem_id, names_to = "strain", values_to = "ECFP4_predictive_probability") %>%
unite("pred_id", c(chem_id, strain), sep=":")
# ChemDesc scores
chemDesc_scores <- read_excel("../../udl_microbiome/data/chemDesc_mce_predictions.xlsx", sheet = "chemDesc_scores") %>%
pivot_longer(cols = -chem_id, names_to = "strain", values_to = "chemDesc_predictive_probability") %>%
unite("pred_id", c(chem_id, strain), sep=":")
# Optimal thresholds
optimal_thresholds <- read_tsv("../data/03.model_evaluation/optimal_thresholds.tsv.gz", show_col_types = FALSE) %>%
filter(score_type == "optimized") %>%
select(representation, threshold)
```
```{r}
optimal_thresholds
```
```{r}
selected.predictions <- chem_names %>%
filter(ProductName %in% c("Ebastine", "Elvitegravir", "Visomitin", "Opicapone", "Thymidine", "Cetrorelix (Acetate)")) %>%
select(`Catalog Number`, ProductName) %>%
rename("chem_id" = "Catalog Number") %>%
left_join(mole_scores, by="chem_id") %>%
left_join(ecfp4_scores, by="pred_id") %>%
left_join(chemDesc_scores, by="pred_id") %>%
mutate("MolE_optimal_threshold" = optimal_thresholds %>% filter(representation == "MolE") %>% select(threshold) %>% unlist(),
"ECPF4_optimal_threshold" = optimal_thresholds %>% filter(representation == "ecfp4") %>% select(threshold) %>% unlist(),
"chemDesc_optimal_threshold" = optimal_thresholds %>% filter(representation == "chemDesc") %>% select(threshold) %>% unlist(),
"MolE_predicted_growth_inhibition" = if_else(MolE_predictive_probability >= MolE_optimal_threshold, 1, 0),
"ECFP4_predicted_growth_inhibition" = if_else(ECFP4_predictive_probability >= ECPF4_optimal_threshold, 1, 0),
"chemDesc_predicted_growth_inhibition" = if_else(chemDesc_predictive_probability >= chemDesc_optimal_threshold, 1, 0)) %>%
select(-pred_id)
```
```{r}
write.xlsx2(selected.predictions %>% filter(ProductName == "Cetrorelix (Acetate)"),
"../data/Supplementary Source Data/Supplementary Figure 13.xlsx",
sheetName = "SF13a",
col.names = TRUE,
row.names = TRUE,
append = FALSE,
showNA = TRUE)
write.xlsx2(selected.predictions %>% filter(ProductName == "Ebastine"),
"../data/Supplementary Source Data/Supplementary Figure 13.xlsx",
sheetName = "SF13b",
col.names = TRUE,
row.names = TRUE,
append = TRUE,
showNA = FALSE)
write.xlsx2(selected.predictions %>% filter(ProductName == "Elvitegravir"),
"../data/Supplementary Source Data/Supplementary Figure 13.xlsx",
sheetName = "SF13c",
col.names = TRUE,
row.names = TRUE,
append = TRUE,
showNA = FALSE)
write.xlsx2(selected.predictions %>% filter(ProductName == "Opicapone"),
"../data/Supplementary Source Data/Supplementary Figure 13.xlsx",
sheetName = "SF13d",
col.names = TRUE,
row.names = TRUE,
append = TRUE,
showNA = FALSE)
write.xlsx2(selected.predictions %>% filter(ProductName == "Thymidine"),
"../data/Supplementary Source Data/Supplementary Figure 13.xlsx",
sheetName = "SF13e",
col.names = TRUE,
row.names = TRUE,
append = TRUE,
showNA = FALSE)
write.xlsx2(selected.predictions %>% filter(ProductName == "Visomitin"),
"../data/Supplementary Source Data/Supplementary Figure 13.xlsx",
sheetName = "SF13f",
col.names = TRUE,
row.names = TRUE,
append = TRUE,
showNA = FALSE)
```
```{r}
selected.predictions
```
## Session Info
```{r session.info}
sessionInfo()
```