Skip to contents

1. Overview

scIso provides tools to analyse isoform usage in single-cell long-read RNA-seq. Starting from an isoform count matrix and a standard Seurat object, scIso:

  • computes isoform-level metrics (Percent Inclusion, PI; heterogeneity, PHI),

  • visualises isoform usage on embeddings,

  • links isoform dynamics to pseudotime.

This vignette demonstrates a minimal end-to-end workflow using an example MPNST dataset downloaded from Zenodo.

Workflow summary

  • Load isoform counts and metadata (cell-type annotation, pseudotime).

  • Build a Seurat object and perform standard preprocessing.

  • Visualise clusters and cell-type annotation.

  • Compute and explore PI/PHI from an isoform count matrix.

  • Project isoform usage on embeddings and along pseudotime.

  • Briefly indicate how to adapt the workflow to your own data.

2. Load packages

We load scIso together with the core dependencies used in this vignette.

library(scIso)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

3. Load example data from Zenodo

We use a pre-computed cell-type annotation, a pseudotime vector and an isoform count matrix hosted on Zenodo.

cell_types <- readRDS(gzcon(
  url("https://zenodo.org/records/17585277/files/Cell_type_annotation.rds")))

pseudotime <- readRDS(gzcon(
  url("https://zenodo.org/records/17654917/files/Pseudotime_2020_23.rds")))

if (curl::has_internet()) {
  tmp <- tempfile(fileext = ".zip")
  curl::curl_download("https://zenodo.org/records/17585277/files/iso_matrix.zip?download=1", tmp)
  unzip(tmp, exdir = tempdir())
  scmat_iso <- Seurat::Read10X(data.dir = tempdir())
}

4. Build a Seurat object

We construct a Seurat object using the isoform counts as a dedicated assay and apply a standard preprocessing pipeline (normalisation, variable feature selection, PCA, UMAP/t-SNE, neighbour graph, clustering).

assay = "ISO"
iso_assay = Seurat::CreateAssayObject(counts = scmat_iso)
seurat_obj = Seurat::CreateSeuratObject(counts = iso_assay,
                                        assay = assay,
                                        project.name = sample_name)

seurat_obj[[paste0('log_nCount_', assay)]] = log(seurat_obj[[paste0('nCount_', assay)]])

seurat_obj
#> An object of class Seurat 
#> 93670 features across 4532 samples within 1 assay 
#> Active assay: ISO (93670 features, 0 variable features)
#>  2 layers present: counts, data
head(rownames(seurat_obj@assays$ISO$counts))
#> [1] "BambuGene6100..BambuTx23926" "BambuGene1971..BambuTx30365"
#> [3] "BambuGene3230..BambuTx42042" "BambuGene8892..BambuTx58725"
#> [5] "Mup10..BambuTx68486"         "Mup12..BambuTx68510"
seurat_obj = Seurat::NormalizeData(seurat_obj,
                                   normalization.method = "LogNormalize",
                                   assay = "ISO",
                                   verbose=F)

seurat_obj = Seurat::FindVariableFeatures(seurat_obj,
                                          assay = "ISO",
                                          nfeatures = 3000,
                                          verbose=F)
seurat_obj
#> An object of class Seurat 
#> 93670 features across 4532 samples within 1 assay 
#> Active assay: ISO (93670 features, 3000 variable features)
#>  2 layers present: counts, data
common_cells <- intersect(colnames(seurat_obj), names(cell_types))
seurat_obj <- seurat_obj[,common_cells]
Seurat::DefaultAssay(seurat_obj) = "ISO"
ndims = 18
seurat_obj = Seurat::ScaleData(seurat_obj,
                               features = rownames(seurat_obj), 
                               verbose = F)

var_features = Seurat::VariableFeatures(object = seurat_obj)

seurat_obj = Seurat::RunPCA(seurat_obj, 
                            features = var_features,
                            verbose = F, 
                            reduction.name = "RNA_pca", 
                            max_dims = 100)

seurat_obj = Seurat::RunUMAP(seurat_obj,
                             reduction = "RNA_pca",
                             dims = 1:ndims,
                             seed.use = 1337L,
                             verbose = F,
                             reduction.name = paste0("RNA_pca_", ndims, "_umap"))
#> Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
#> To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
#> This message will be shown once per session

seurat_obj = Seurat::RunTSNE(seurat_obj,
                             reduction = "RNA_pca",
                             dims = 1:ndims,
                             seed.use = 1337L,
                             verbose = F,
                             reduction.name = paste0("RNA_pca_", ndims, "_tsne"))

seurat_obj
#> An object of class Seurat 
#> 93670 features across 3566 samples within 1 assay 
#> Active assay: ISO (93670 features, 3000 variable features)
#>  3 layers present: counts, data, scale.data
#>  3 dimensional reductions calculated: RNA_pca, RNA_pca_18_umap, RNA_pca_18_tsne
seurat_obj = Seurat::FindNeighbors(seurat_obj, reduction = "RNA_pca", dims = c(1:ndims))
#> Computing nearest neighbor graph
#> Computing SNN
seurat_obj = Seurat::FindClusters(seurat_obj, resolution = 0.5)
#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#> 
#> Number of nodes: 3566
#> Number of edges: 128336
#> 
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.8735
#> Number of communities: 9
#> Elapsed time: 0 seconds

5. Add cell-type annotation

We now bring in the external cell-type labels and overlay them on the embedding.

cell_types_named <- levels(cell_types)[cell_types]
names(cell_types_named) <- names(cell_types)

seurat_obj$cell_types <- NA_character_
seurat_obj$cell_types[common_cells] <- cell_types_named[common_cells]
proj <- "tsne"
Seurat::DimPlot(seurat_obj, reduction = paste0("RNA_pca_18_", proj), label = T) +
                      ggplot2::labs(title = paste0(proj,"\n", "MPNST"),
                                    subtitle = paste0(ncol(seurat_obj), " cells")) +
                      Seurat::NoAxes() +
                      ggplot2::theme(aspect.ratio = 1,
                                     plot.title = ggplot2::element_text(hjust = 0.5),
                                     plot.subtitle = ggplot2::element_text(hjust = 0.5)) |

            Seurat::DimPlot(seurat_obj, reduction = paste0("RNA_pca_18_", proj), group.by = "cell_types") +
                    ggplot2::labs(title = paste0(proj,"\n", "MPNST"),
                                  subtitle = paste0(ncol(seurat_obj), " cells")) +
                    Seurat::NoAxes() +
                    ggplot2::theme(aspect.ratio = 1,
                                   plot.title = ggplot2::element_text(hjust = 0.5),
                                   plot.subtitle = ggplot2::element_text(hjust = 0.5))

#p_clusters | p_annot

6. Explore isoform PI/PHI

scIso uses PI (Percent Inclusion) and PHI (heterogeneity) to summarise isoform usage per gene. For speed, we load a pre-computed PI matrix, but the same result can be obtained with computePIMatrix() on the fly.

#pi_mtx <- scIso::computePIMatrix(seurat_obj, 
#                                 assay = "ISO",
#                                 slot = "count")
if (curl::has_internet()) {
  tmp <- tempfile(fileext = ".rds")
  curl::curl_download("https://zenodo.org/records/17585277/files/pi_mtx.rds?download=1", tmp)
  pi_mtx <- readRDS(tmp)
}
scIso::plotPIPHI(pi_mat = pi_mtx,
                 iso_mat = seurat_obj@assays$ISO$counts,
                 min_gene_count = 3000
                 )

7. Visualise isoform usage

We next contrast two isoforms of Srsf3 on the t-SNE embedding, using FeaturePlotPI(). This function computes PI for the selected isoforms and displays PI and log-ratio at the single-cell level.

p <- scIso::FeaturePlotPI(seurat_obj, 
                     name = "Srsf3", 
                     reduction = paste0("RNA_pca_18_", proj),
                     iso1 = "Srsf3..ENSMUST00000130216",
                     iso2 = "Srsf3..ENSMUST00000037776")
#> Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
#>  Please use the `layer` argument instead.
#>  The deprecated feature was likely used in the scIso package.
#>   Please report the issue at <https://github.com/alihamraoui/scIso/issues>.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
p[[1]] | p[[3]] | p[[4]]

p[[1]] | p[[2]]

8. Restrict to tumour cells and add pseudotime

We focus on tumour cells, refine the annotation, and add the pseudotime trajectory previously inferred for this dataset.

major <- seurat_obj@meta.data %>%
            group_by(seurat_clusters) %>%
            summarise(cell_annotation=names(which.max(table(cell_types))))

idx <- match(seurat_obj@meta.data$seurat_clusters, major$seurat_clusters)
seurat_obj$cell_pop <- major$cell_annotation[idx]
seurat_obj <- subset(seurat_obj, subset = cell_pop == "tumor cells")

seurat_obj$tumor_cell_type <- ifelse(seurat_obj$seurat_clusters == 4, "Sox10+", "Sox9+")
seurat_obj$tumor_cell_type <- ifelse(seurat_obj$seurat_clusters == 4, "Sox10+", "Sox9+")
names(pseudotime) <- sub(".*_", "", names(pseudotime))

seurat_obj$pseudotime <- pseudotime
Seurat::DimPlot(seurat_obj, reduction = paste0("RNA_pca_18_", proj), group.by = "cell_pop") +
                    ggplot2::labs(title = paste0(proj,"\n", "MPNST"),
                                  subtitle = paste0(ncol(seurat_obj), " cells")) +
                    Seurat::NoAxes() +
                    ggplot2::theme(aspect.ratio = 1,
                                   plot.title = ggplot2::element_text(hjust = 0.5),
                                   plot.subtitle = ggplot2::element_text(hjust = 0.5)) |
  
  
  Seurat::DimPlot(seurat_obj, reduction = paste0("RNA_pca_18_", proj), group.by = "tumor_cell_type") +
                    ggplot2::labs(title = paste0(proj,"\n", "MPNST"),
                                  subtitle = paste0(ncol(seurat_obj), " cells")) +
                    Seurat::NoAxes() +
                    ggplot2::theme(aspect.ratio = 1,
                                   plot.title = ggplot2::element_text(hjust = 0.5),
                                   plot.subtitle = ggplot2::element_text(hjust = 0.5)) |
  
  
  Seurat::FeaturePlot(seurat_obj, features = "pseudotime",
                    reduction = paste0("RNA_pca_18_", proj)) +
  ggplot2::scale_color_gradientn(colors = rev(RColorBrewer::brewer.pal(name = "Spectral", n = 10))) +
  ggplot2::ggtitle("Pseudotime") +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = ggplot2::element_text(hjust = 0.5)) +
  Seurat::NoAxes()
#> Scale for colour is already present.
#> Adding another scale for colour, which will replace the existing scale.

9. Isoform dynamics along pseudotime

IsoformPseudotimePlot() summarises isoform usage for a given gene across pseudotime bins and optionally stratified by cell groups.

scIso::IsoformPseudotimePlot(seurat_obj, 
                             gene = "Dnaja1",
                             band_cols = "tumor_cell_type",
                             pseudotime_col = "pseudotime",
                             nbins = 80,
                             mode = "PI",
                             slot = "data")
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#>  Please use `linewidth` instead.
#>  The deprecated feature was likely used in the scIso package.
#>   Please report the issue at <https://github.com/alihamraoui/scIso/issues>.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> `geom_smooth()` using formula = 'y ~ x'
#> Warning: Removed 764 rows containing non-finite outside the scale range
#> (`stat_smooth()`).

scIso::IsoformPseudotimePlot(seurat_obj, 
                             gene = "Dnaja1",
                             band_cols = c("tumor_cell_type", "tumor_cell_type"),
                             pseudotime_col = "pseudotime", 
                             nbins = 30,
                             mode = "exp",
                             slot = "data")
#> `geom_smooth()` using formula = 'y ~ x'

10. Using scIso on your own data

  • To apply this workflow to your own dataset:

  • replace the example isoform matrix with your own (features = isoforms or isoform-level identifiers, columns = cells/barcodes);

  • construct a Seurat object with an isoform assay (e.g. “ISO”) and follow your preferred preprocessing strategy;

  • compute PI using computePIMatrix() on your isoform count matrix;

  • add any relevant metadata (cell types, clusters, trajectories) to ;

  • use plotPIPHI(), FeaturePlotPI() and IsoformPseudotimePlot() to interrogate isoform heterogeneity and dynamics.

sessionInfo()
#> R version 4.5.2 (2025-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.3 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
#>  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
#>  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
#> [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] future_1.68.0 dplyr_1.1.4   scIso_0.1.0  
#> 
#> loaded via a namespace (and not attached):
#>   [1] RColorBrewer_1.1-3     jsonlite_2.0.0         magrittr_2.0.4        
#>   [4] spatstat.utils_3.2-0   farver_2.1.2           rmarkdown_2.30        
#>   [7] fs_1.6.6               ragg_1.5.0             vctrs_0.6.5           
#>  [10] ROCR_1.0-11            spatstat.explore_3.6-0 paletteer_1.6.0       
#>  [13] htmltools_0.5.8.1      curl_7.0.0             sass_0.4.10           
#>  [16] sctransform_0.4.2      parallelly_1.45.1      KernSmooth_2.23-26    
#>  [19] bslib_0.9.0            htmlwidgets_1.6.4      desc_1.4.3            
#>  [22] ica_1.0-3              plyr_1.8.9             plotly_4.11.0         
#>  [25] zoo_1.8-14             cachem_1.1.0           igraph_2.2.1          
#>  [28] mime_0.13              lifecycle_1.0.4        pkgconfig_2.0.3       
#>  [31] Matrix_1.7-4           R6_2.6.1               fastmap_1.2.0         
#>  [34] fitdistrplus_1.2-4     shiny_1.11.1           digest_0.6.39         
#>  [37] rematch2_2.1.2         patchwork_1.3.2        Seurat_5.3.1          
#>  [40] tensor_1.5.1           prismatic_1.1.2        RSpectra_0.16-2       
#>  [43] irlba_2.3.5.1          textshaping_1.0.4      labeling_0.4.3        
#>  [46] progressr_0.18.0       latex2exp_0.9.6        spatstat.sparse_3.1-0 
#>  [49] mgcv_1.9-3             httr_1.4.7             polyclip_1.10-7       
#>  [52] abind_1.4-8            compiler_4.5.2         withr_3.0.2           
#>  [55] S7_0.2.1               fastDummies_1.7.5      MASS_7.3-65           
#>  [58] tools_4.5.2            lmtest_0.9-40          otel_0.2.0            
#>  [61] httpuv_1.6.16          future.apply_1.20.0    goftest_1.2-3         
#>  [64] glue_1.8.0             nlme_3.1-168           promises_1.5.0        
#>  [67] grid_4.5.2             Rtsne_0.17             cluster_2.1.8.1       
#>  [70] reshape2_1.4.5         generics_0.1.4         gtable_0.3.6          
#>  [73] spatstat.data_3.1-9    tidyr_1.3.1            data.table_1.17.8     
#>  [76] sp_2.2-0               spatstat.geom_3.6-1    RcppAnnoy_0.0.22      
#>  [79] ggrepel_0.9.6          RANN_2.6.2             pillar_1.11.1         
#>  [82] stringr_1.6.0          spam_2.11-1            RcppHNSW_0.6.0        
#>  [85] later_1.4.4            splines_4.5.2          lattice_0.22-7        
#>  [88] survival_3.8-3         deldir_2.0-4           tidyselect_1.2.1      
#>  [91] miniUI_0.1.2           pbapply_1.7-4          knitr_1.50            
#>  [94] gridExtra_2.3          scattermore_1.2        xfun_0.54             
#>  [97] matrixStats_1.5.0      stringi_1.8.7          lazyeval_0.2.2        
#> [100] yaml_2.3.10            evaluate_1.0.5         codetools_0.2-20      
#> [103] tibble_3.3.0           cli_3.6.5              uwot_0.2.4            
#> [106] xtable_1.8-4           reticulate_1.44.1      systemfonts_1.3.1     
#> [109] jquerylib_0.1.4        Rcpp_1.1.0             globals_0.18.0        
#> [112] spatstat.random_3.4-3  png_0.1-8              spatstat.univar_3.1-5 
#> [115] parallel_4.5.2         pkgdown_2.2.0          ggplot2_4.0.1         
#> [118] dotCall64_1.2          listenv_0.10.0         viridisLite_0.4.2     
#> [121] ggthemes_5.1.0         scales_1.4.0           ggridges_0.5.7        
#> [124] crayon_1.5.3           SeuratObject_5.2.0     purrr_1.2.0           
#> [127] rlang_1.1.6            cowplot_1.2.0