This vignette demonstrates the process of adapting a reference dataset, specifically a reference of Peripheral Blood Mononuclear Cells (PBMCs), to match the marker panels of different query datasets. Reference adaptation is sometimes necessary because it is not always possible to achieve the same level of annotation granularity with a smaller set of markers than what is possible in a reference set with many markers. The adaptation involves merging populations that cyDefine fails to distinguish between.
This vignette demonstrates the adaptation process using a CyTOF (Z2YR) and Spectral Flow Cytometry (OMIP69) panel as well as the small OMIP23 panel.
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(patchwork)
library(cyCombine)
library(cyDefine)# Set up seed and number of cells
seed <- 430
n_cells <- 30000
# HIMC (FR-FCM-Z2YR) panel
himc_panel <- c("CD45", "CD57", "HLA-DR", "CCR6", "CD19", "IgG", "CD4", "IgD", "CD20", "IgA", "CD56", "CD86", "ICOS", "TCRgd", "CD45RA", "CD123", "CD27", "CXCR3", "NKG2A", "anti", "CD11c", "CD14", "CD26", "CD8a", "CD33", "CD161", "CD127", "CCR10", "CCR7", "CCR9", "CD25", "CD3", "CXCR5", "CD38", "a4b7", "PD-1", "CD62L", "CLA", "CD16")
# OMIP-023 markers
omip23 <- c("CD45", "CD3", "CD8", "CD4", "CD25", "CD127", "CD19", "CD38", "HLA-DR", "CD16", "CD56", "CD14", "CD69")
# OMIP-069 markers
omip69 <- c("CD45","CD3","CD4","CD8","CD25","TCRgd","CD14","CD16","CD11c","CD19","CD20","CD24","CD39","IgD","IgG","IgM","CD141","CD1c","CD123","CD2","CD56","CCR7","CD27","CD28","CD45RA","CD95","CD127","CD337","CCR6","CCR5","CXCR5","CXCR3","HLADR","CD38","CD57","PD-1","CD159a","CD159c","CD314")
# POISED markers
poised_panel <- c("CD16", "IFNg", "CD69", "CXCR3", "IL.17", "LAP",
"CD27", "CD40L", "PD1", "CD123", "CD45RA", "CD28",
"GPR15", "HLA.DR", "CD33", "CD14", "CD127", "CD86",
"TCRgd", "CD19", "CD49b", "IL.4", "CD4", "CD8",
"CD38", "LAG3", "OX40", "CD20", "CCR4", "IL9",
"CD3", "CD11c", "CCR7", "CD57", "IL.10", "integrin",
"CD25", "CD56", "CLA")# Get and filter markers from Seurat reference
pbmc_reference <- get_reference("pbmc", path = "data", store = TRUE)## Retrieving reference..
ref_markers <- get_markers(pbmc_reference)
ref_markers <- ref_markers[!startsWith(ref_markers, "cell") & !startsWith(ref_markers, "Rat")]
# Subset reference
reference <- pbmc_reference |>
dplyr::filter(
!celltype.l2 %in% c("Doublet", "Platelet", "Eryth", "HSPC")
)
# Set up color palette for cell types
set.seed(seed)
colors <- reference |>
pull(celltype.l2) |>
unique() |> sort() |>
get_distinct_colors()
# UMAP plot for Seurat reference
umap_ref <- plot_umap(
reference,
return_data = T,
sample_n = n_cells,
markers = ref_markers,
add_centroids = "text",
title = "Universal PBMC reference",
col = "celltype.l2")## Generating UMAP
umap_ref$plot
## OMIP23 - Adapt reference
# Create a dummy query data frame based on OMIP23 markers
dummy_query <- data.frame(matrix(0, nrow = 1, ncol = length(omip23)))
colnames(dummy_query) <- omip23
# Map reference markers to OMIP23 panel
mapped_dummy_omip23 <- map_marker_names(ref_markers = ref_markers,
using_pbmc = TRUE, query = dummy_query,
query_markers = omip23
)# Generate UMAP plot using OMIP23 markers
umap_ref_omip23 <- plot_umap(
umap_ref$data,
down_sample = F,
return_data = T,
markers = colnames(mapped_dummy_omip23),
add_centroids = "text",
title = "OMIP-023 marker subset",
col = "celltype.l2")## Generating UMAP
umap_ref_omip23$plot# Adapt the Seurat reference to match OMIP23 markers
adapted_reference_omip23 <- adapt_reference(
reference = reference,
markers = colnames(mapped_dummy_omip23),
mtry = floor(ncol(mapped_dummy_omip23)/2),
num.threads = 4,
# exclude_celltypes = c("Doublet", "Platelet", "Eryth", "HSPC"),
using_pbmc = TRUE,
seed = seed
)## # ------- Population merging - Round 1 ------- #
## Running classification to identify similar populations
## Merging groups of similar populations
## Merging:
## ASDC, pDC
## B intermediate, B memory, B naive
## CD4 CTL, CD4 TCM, CD4 TEM
## CD4 Proliferating, CD4 Naive
## CD8 Proliferating, CD8 TEM
## CD8 TCM, CD8 Naive
## MAIT, dnT, gdT
## NK Proliferating, NK
## cDC1, cDC2
## # ------- Population merging - Round 2 ------- #
## Running classification to identify similar populations
##
## Reference adapted!
# List names to add custom colors (optional)
table(adapted_reference_omip23$celltype)##
## B intermediate / B memory / B naive CD14 Mono
## 13434 42690
## CD16 Mono CD4 CTL / CD4 TCM / CD4 TEM
## 6320 20907
## CD4 Proliferating / CD4 Naive CD8 Proliferating / CD8 TEM
## 17587 11818
## CD8 TCM / CD8 Naive ILC
## 13651 132
## MAIT / dnT / gdT NK Proliferating / NK
## 6789 17721
## NK_CD56bright Plasmablast
## 943 366
## Treg cDC
## 2507 2652
## pDC
## 937
colors <- cyDefine:::expand_colors(adapted_celltypes = adapted_reference_omip23$celltype, original_celltypes = reference$celltype.l2, colors = colors)
# Join adapted cell types and plot the UMAP
plot_adapted_omip23 <- umap_ref_omip23$data |>
dplyr::left_join(adapted_reference_omip23[, c("cell_id", "celltype", "celltype_original")], by = c("cell_id")) |>
plot_embedding(
add_centroids = "text",
title = "Adapted reference for OMIP-023",
highlight_labels = T,
colors = colors,
col = "celltype")
umap_ref_omip23$plot + plot_adapted_omip23# Generate a diagram visualizing the adapted reference structure
fontcolor_nodes <- c(
"NK" = "white",
"NK Proliferating / NK" = "white"
)
plot_diagram(adapted_reference_omip23, colors = colors, fontcolor_nodes = fontcolor_nodes)# Create a dummy query data frame based on OMIP69 markers
dummy_query <- data.frame(matrix(0, nrow = 1, ncol = length(omip69)))
colnames(dummy_query) <- omip69
# Map reference markers to OMIP69 panel
mapped_dummy_omip69 <- map_marker_names(
ref_markers = ref_markers,
using_pbmc = TRUE, query = dummy_query,
query_markers = omip69
)## Renaming CXCR5 to CD185
## Renaming CCR6 to CD196
## Renaming PD-1 to CD279
## Renaming TCRgd to gdTCR
## Renaming CCR5 to CD195
## Renaming HLADR to HLA-DR
## Warning in map_marker_names(ref_markers = ref_markers, using_pbmc = TRUE, : The following markers were not detected to be in the reference and were excluded from the data:
## IgG, CCR7, CXCR3, CD159a, CD159c
## Markers of the Universal PBMC reference can be found in 'pbmc_markers'. If needed, markers can be manually mapped to reference markers using the 'map_specfic_from' and 'map_specfic_to' arguments
# Generate UMAP plot using OMIP69 markers
umap_ref_omip69 <- plot_umap(
umap_ref$data,
return_data = T,
down_sample = F,
add_centroids = "text",
title = "OMIP-069 marker subset",
markers = colnames(mapped_dummy_omip69),
col = "celltype.l2")## Generating UMAP
umap_ref_omip69$plot# Adapt the Seurat reference to match OMIP69 markers
adapted_reference_omip69 <- adapt_reference(
reference = reference,
markers = colnames(mapped_dummy_omip69),
mtry = floor(ncol(mapped_dummy_omip69)/2),
num.threads = 4,
# exclude_celltypes = c("Doublet", "Platelet", "Eryth", "HSPC"),
using_pbmc = TRUE,
seed = seed
)## # ------- Population merging - Round 1 ------- #
## Running classification to identify similar populations
## Merging groups of similar populations
## Merging:
## ASDC, pDC
## CD4 Proliferating, CD4 TEM, CD4 TCM
## CD8 Proliferating, CD8 TEM
## NK Proliferating, NK
## # ------- Population merging - Round 2 ------- #
## Running classification to identify similar populations
##
## Reference adapted!
# Adjust color mapping for specific cell types
colors <- cyDefine:::expand_colors(adapted_celltypes = adapted_reference_omip69$celltype, original_celltypes = reference$celltype.l2, colors = colors)
# Join adapted cell types and plot the UMAP
plot_adapted_omip69 <- umap_ref_omip69$data |>
dplyr::left_join(adapted_reference_omip69[, c("cell_id", "celltype", "celltype_original")], by = c("cell_id")) |>
plot_embedding(
add_centroids = "text",
highlight_labels = T,
title = "Adapted reference for OMIP-069",
colors = colors,
col = "celltype")
umap_ref_omip69$plot + plot_adapted_omip69# Generate a diagram visualizing which cell types had to be merged
fontcolor_nodes <- c(
"NK" = "white",
"NK Proliferating / NK" = "white"
)
plot_diagram(adapted_reference_omip69, colors = colors, fontcolor_nodes = fontcolor_nodes)layout <- "
##BBCC
AABBCC
AADDEE
##DDEE
"
full_plot_small <- umap_ref$plot + umap_ref_omip23$plot + plot_adapted_omip23 + umap_ref_omip69$plot + plot_adapted_omip69 +
plot_layout(design = layout) +
patchwork::plot_annotation(tag_levels = "a") &
theme(text = element_text(size = 12))
# full_plot
# ggsave(full_plot_small, filename = "../figs/adapt_reference_omip23_omip69.pdf", units = "cm", height = 25, width = 35)
full_plot_small# Create a dummy query data frame based on HIMC markers
dummy_query <- data.frame(matrix(0, nrow = 1, ncol = length(himc_panel)))
colnames(dummy_query) <- himc_panel
# Map reference markers to HIMC panel
mapped_dummy_himc <- map_marker_names(ref_markers = ref_markers,
using_pbmc = TRUE, query = dummy_query,
query_markers = himc_panel
)## Renaming CXCR5 to CD185
## Renaming CCR6 to CD196
## Renaming PD-1 to CD279
## Renaming TCRgd to gdTCR
## Renaming CCR9 to CD199
## Renaming ICOS to CD278
## Warning in map_marker_names(ref_markers = ref_markers, using_pbmc = TRUE, : The following markers were not detected to be in the reference and were excluded from the data:
## IgG, IgA, CXCR3, NKG2A, anti, CD33, CCR7, a4b7, CD62L, CLA
## Markers of the Universal PBMC reference can be found in 'pbmc_markers'. If needed, markers can be manually mapped to reference markers using the 'map_specfic_from' and 'map_specfic_to' arguments
# Generate UMAP plot using HIMC markers
umap_ref_himc <- plot_umap(
umap_ref$data,
down_sample = F,
return_data = T,
add_centroids = "text",
title = "Z2YR marker subset (CyTOF)",
colors = colors,
markers = colnames(mapped_dummy_himc),
col = "celltype.l2")## Generating UMAP
umap_ref_himc$plot# Adapt the Seurat reference to match HIMC markers
adapted_reference_himc <- adapt_reference(
reference = reference,
markers = colnames(mapped_dummy_himc),
mtry = floor(ncol(mapped_dummy_himc)/2),
num.threads = 4,
# exclude_celltypes = c("Doublet", "Platelet", "Eryth", "HSPC"),
using_pbmc = TRUE,
seed = seed
)## # ------- Population merging - Round 1 ------- #
## Running classification to identify similar populations
## Merging groups of similar populations
## Merging:
## ASDC, pDC
## B intermediate, B memory
## CD4 Proliferating, CD4 TEM, CD4 TCM
## CD8 Proliferating, CD8 TCM, CD8 TEM
## NK Proliferating, NK
## dnT, gdT
## # ------- Population merging - Round 2 ------- #
## Running classification to identify similar populations
##
## Reference adapted!
# List names to add custom colors (optional)
table(adapted_reference_himc$celltype)##
## B intermediate / B memory B naive
## 5716 7718
## CD14 Mono CD16 Mono
## 42690 6320
## CD4 CTL CD4 Naive
## 1736 17479
## CD4 Proliferating / CD4 TEM / CD4 TCM CD8 Naive
## 19279 10768
## CD8 Proliferating / CD8 TCM / CD8 TEM ILC
## 14701 132
## MAIT NK Proliferating / NK
## 2784 17721
## NK_CD56bright Plasmablast
## 943 366
## Treg cDC1
## 2507 151
## cDC2 dnT / gdT
## 2501 4005
## pDC
## 937
# Adjust color mapping for specific cell types
# colors["CD8 TCM / CD8 Naive"] <- colors["CD8 Naive"]
# colors["cDC"] <- colors["cDC1"]
colors <- cyDefine:::expand_colors(adapted_celltypes = adapted_reference_himc$celltype, original_celltypes = reference$celltype.l2, colors = colors)
# Join adapted cell types and plot the UMAP
plot_adapted_himc <- umap_ref_himc$data |>
dplyr::left_join(adapted_reference_himc[, c("cell_id", "celltype", "celltype_original")], by = c("cell_id")) |>
plot_embedding(
add_centroids = "text",
title = "Adapted reference for Z2YR (CyTOF)",
highlight_labels = T,
colors = colors,
col = "celltype")
umap_ref_himc$plot + plot_adapted_himc# Generate a diagram visualizing the adapted reference structure
fontcolor_nodes <- c(
"NK" = "white",
"NK Proliferating / NK" = "white"
)
plot_diagram(adapted_reference_himc, colors = colors, fontcolor_nodes = fontcolor_nodes)# Create a dummy query data frame based on OMIP23 markers
dummy_query <- data.frame(matrix(0, nrow = 1, ncol = length(poised_panel)))
colnames(dummy_query) <- poised_panel
# Map reference markers to OMIP23 panel
mapped_dummy_poised <- map_marker_names(ref_markers = ref_markers,
using_pbmc = TRUE, query = dummy_query,
query_markers = poised_panel, map_specific_from = c("integrin"), map_specific_to = c("Integrin-7")
)## Mapping specified markers to reference markers:
## integrin to Integrin-7
## Renaming CCR4 to CD194
## Renaming PD1 to CD279
## Renaming TCRgd to gdTCR
## Renaming OX40 to CD134
## Renaming LAG3 to CD223
## Renaming CD40L to CD154
## Renaming HLA.DR to HLA-DR
## Warning in map_marker_names(ref_markers = ref_markers, using_pbmc = TRUE, : The following markers were not detected to be in the reference and were excluded from the data:
## IFNg, CXCR3, IL.17, LAP, GPR15, CD33, IL.4, IL9, CCR7, IL.10, CLA
## Markers of the Universal PBMC reference can be found in 'pbmc_markers'. If needed, markers can be manually mapped to reference markers using the 'map_specfic_from' and 'map_specfic_to' arguments
# Generate UMAP plot using POISED markers
umap_ref_poised <- plot_umap(
umap_ref$data,
down_sample = F,
return_data = T,
markers = colnames(mapped_dummy_poised),
add_centroids = "text",
title = "POISED marker subset",
col = "celltype.l2")## Generating UMAP
umap_ref_poised$plot# Adapt the Seurat reference to match POISED markers
adapted_reference_poised <- adapt_reference(
reference = reference,
markers = colnames(mapped_dummy_poised),
mtry = floor(ncol(mapped_dummy_poised)/2),
num.threads = 4,
# exclude_celltypes = c("Doublet", "Platelet", "Eryth", "HSPC"),
using_pbmc = TRUE,
seed = seed
)## # ------- Population merging - Round 1 ------- #
## Running classification to identify similar populations
## Merging groups of similar populations
## Merging:
## ASDC, pDC
## B intermediate, B memory
## CD4 Proliferating, CD4 TEM, CD4 TCM
## CD8 Proliferating, CD8 TEM
## MAIT, gdT
## NK Proliferating, NK
## cDC1, cDC2
## # ------- Population merging - Round 2 ------- #
## Running classification to identify similar populations
## Merging groups of similar populations
## Merging:
## CD8 TCM, CD8 Proliferating / CD8 TEM
## # ------- Population merging - Round 3 ------- #
## Running classification to identify similar populations
## Merging groups of similar populations
## Merging:
## dnT, MAIT / gdT
## # ------- Population merging - Round 4 ------- #
## Running classification to identify similar populations
##
## Reference adapted!
# List names to add custom colors (optional)
table(adapted_reference_poised$celltype)##
## B intermediate / B memory B naive
## 5716 7718
## CD14 Mono CD16 Mono
## 42690 6320
## CD4 CTL CD4 Naive
## 1736 17479
## CD4 Proliferating / CD4 TEM / CD4 TCM CD8 Naive
## 19279 10768
## CD8 TCM / CD8 Proliferating / CD8 TEM ILC
## 14701 132
## NK Proliferating / NK NK_CD56bright
## 17721 943
## Plasmablast Treg
## 366 2507
## cDC dnT / MAIT / gdT
## 2652 6789
## pDC
## 937
colors <- cyDefine:::expand_colors(adapted_celltypes = adapted_reference_poised$celltype, original_celltypes = reference$celltype.l2, colors = colors)
# Join adapted cell types and plot the UMAP
plot_adapted_poised <- umap_ref_poised$data |>
dplyr::left_join(adapted_reference_poised[, c("cell_id", "celltype", "celltype_original")], by = c("cell_id")) |>
plot_embedding(
add_centroids = "text",
title = "Adapted reference for POISED",
highlight_labels = T,
colors = colors,
col = "celltype")
umap_ref_poised$plot + plot_adapted_poised# Generate a diagram visualizing the adapted reference structure
fontcolor_nodes <- c(
"NK" = "white",
"NK Proliferating / NK" = "white"
)
plot_diagram(adapted_reference_poised, colors = colors, fontcolor_nodes = fontcolor_nodes)Contact