Title: | Tools for managing SingleCellExperiment objects as projects |
---|---|
Description: | Tools analyzing SingleCellExperiment objects as projects. for input into the Chevreul app downstream. Includes functions for analysis of single cell RNA sequencing data. Supported by NIH grants R01CA137124 and R01EY026661 to David Cobrinik. |
Authors: | Kevin Stachelek [aut, cre] , Bhavana Bhat [aut] |
Maintainer: | Kevin Stachelek <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.99.1 |
Built: | 2024-10-30 02:49:45 UTC |
Source: | https://github.com/whtns/chevreulProcess |
Add a Percentage of Mitochondrial Read Count Categorical Variable to the Object (based on nCount_RNA)
add_percent_mito(object, experiment = "gene")
add_percent_mito(object, experiment = "gene")
object |
A object |
experiment |
gene |
a single cell object with cell metadata column containing mitochondrial percentage
data(small_example_dataset) add_percent_mito(small_example_dataset)
data(small_example_dataset) add_percent_mito(small_example_dataset)
Annotate Cell Cycle for Gene and Transcript SingleCellExperiment Objects
annotate_cell_cycle(object)
annotate_cell_cycle(object)
object |
A SingleCellExperiment object |
a SingleCellExperiment object
Append projects to database
append_to_project_db( new_project_path, cache_location = "~/.cache/chevreul", sqlite_db = "single-cell-projects.db", verbose = TRUE )
append_to_project_db( new_project_path, cache_location = "~/.cache/chevreul", sqlite_db = "single-cell-projects.db", verbose = TRUE )
new_project_path |
new project path |
cache_location |
Path to cache "~/.cache/chevreul" |
sqlite_db |
sqlite db |
verbose |
print messages |
a sqlite database with SingleCellExperiment objects
Create a sqlite database of bigwig files matching cell ids in objects
build_bigwig_db(bam_files, bigwig_db = "~/.cache/chevreul/bw-files.db")
build_bigwig_db(bam_files, bigwig_db = "~/.cache/chevreul/bw-files.db")
bam_files |
vector of paths to bam files |
bigwig_db |
bigwig database |
a path to a bigwig file sqlite database
cell cycle genes with paired expression represented by HGNC symbol
cc.genes.cyclone
cc.genes.cyclone
a list of dataframes with G1, G2, and S gene expression
G1 gene symbols
G2 gene symbols
S gene symbols
...
cyclone
Cluster and Reduce Dimensions of a object
clustering_workflow( object, excluded_cells, resolution = seq(0.2, 1, by = 0.2), organism = "human", experiment_name = "default_experiment", ... )
clustering_workflow( object, excluded_cells, resolution = seq(0.2, 1, by = 0.2), organism = "human", experiment_name = "default_experiment", ... )
object |
a SingleCellExperiment object |
excluded_cells |
named list of cells to exclude |
resolution |
resolution(s) to use for clustering cells |
organism |
Organism |
experiment_name |
name of the experiment |
... |
extra args passed to sce_process |
a clustered SingleCellExperiment object
Convert SingleCellExperiment Objects from Human to Mouse
convert_human_sce_to_mouse(object, ...)
convert_human_sce_to_mouse(object, ...)
object |
Human SingleCellExperiment object |
... |
to be passed to |
a SingleCellExperiment object
Convert gene symbols between mouse and human
convert_symbols_by_species(src_genes, src_species)
convert_symbols_by_species(src_genes, src_species)
src_genes |
Source gene symbol to be converted |
src_species |
Source species |
a SingleCellExperiment object
Create a database containing chevreul projects
create_project_db( cache_location = "~/.cache/chevreul", sqlite_db = "single-cell-projects.db", verbose = TRUE )
create_project_db( cache_location = "~/.cache/chevreul", sqlite_db = "single-cell-projects.db", verbose = TRUE )
cache_location |
Path to cache "~/.cache/chevreul" |
sqlite_db |
Database to be created |
verbose |
print messages |
a sqlite database with SingleCellExperiment objects
Ensembl version used for build
ensembl_version
ensembl_version
An object of class character
of length 1.
# ensembl_version
# ensembl_version
Find all markers at a range of resolutions
find_all_markers(object, group_by = NULL, experiment = "gene", ...)
find_all_markers(object, group_by = NULL, experiment = "gene", ...)
object |
An object. |
group_by |
A metadata variable to group by. |
experiment |
Assay to use, Default "gene". |
... |
extra args passed to stash_marker_features |
a SingleCellExperiment object containing marker genes
data("small_example_dataset") find_all_markers(small_example_dataset, "gene_snn_res.1")
data("small_example_dataset") find_all_markers(small_example_dataset, "gene_snn_res.1")
convert hgnc gene symbols to ensembl transcript ids
genes_to_transcripts(symbols, organism = "human")
genes_to_transcripts(symbols, organism = "human")
symbols |
character vector of gene symbols |
organism |
mouse or human |
a vector of transcripts
genes_to_transcripts("NRL")
genes_to_transcripts("NRL")
Get cell metadata
get_colData(object)
get_colData(object)
object |
a SingleCellExperiment object |
dataframe containing object metadata
data(small_example_dataset) get_colData(small_example_dataset)
data(small_example_dataset) get_colData(small_example_dataset)
Get Feature Types
get_feature_types(object)
get_feature_types(object)
object |
a SingleCellExperiment object |
vector of feature types in an object
data(small_example_dataset) get_feature_types(small_example_dataset)
data(small_example_dataset) get_feature_types(small_example_dataset)
Get feature names
get_features(object, experiment = "gene")
get_features(object, experiment = "gene")
object |
a SingleCellExperiment object |
experiment |
"gene" or "transcript" |
variable features from a SingleCellExperiment object
data(small_example_dataset) get_features(small_example_dataset)
data(small_example_dataset) get_features(small_example_dataset)
Get object metadata
get_sce_metadata(object)
get_sce_metadata(object)
object |
a SingleCellExperiment object |
variable features from a SingleCellExperiment object
data(small_example_dataset) get_sce_metadata(small_example_dataset)
data(small_example_dataset) get_sce_metadata(small_example_dataset)
Get transcript ids in objects for one or more gene of interest
get_transcripts_from_sce(object, gene, organism = "human")
get_transcripts_from_sce(object, gene, organism = "human")
object |
A SingleCellExperiment object |
gene |
Gene of interest |
organism |
Organism |
transcripts constituting a gene of interest in a SingleCellExperiment object
Get variable features
get_variable_features(object, experiment = "gene")
get_variable_features(object, experiment = "gene")
object |
a SingleCellExperiment object |
experiment |
"gene" or "transcript" |
variable features from a SingleCellExperiment object
data(small_example_dataset) get_variable_features(small_example_dataset)
data(small_example_dataset) get_variable_features(small_example_dataset)
Human (Homo sapiens) annotations based on genome assembly GRCH38 from Ensembl.
grch38
grch38
An object of class tbl_df
(inherits from tbl
, data.frame
) with 76062 rows and 9 columns.
Variables:
ensgene
entrez
symbol
chr
start
end
strand
biotype
description
http://ensembl.org/homo_sapiens
data("grch38") head(grch38)
data("grch38") head(grch38)
Lookup table for converting Human (Homo sapiens) Ensembl transcript IDs to gene IDs based on genome assembly GRCH38 from Ensembl.
grch38_tx2gene
grch38_tx2gene
An object of class tbl_df
(inherits from tbl
, data.frame
) with 277081 rows and 2 columns.
Variables:
enstxp
ensgene
http://ensembl.org/homo_sapiens
data(grch38_tx2gene) head(grch38_tx2gene)
data(grch38_tx2gene) head(grch38_tx2gene)
Homologs drawn from Biomart
human_to_mouse_homologs
human_to_mouse_homologs
A data frame with 23188 rows and 2 columns
human gene symbols
mouse gene symbols
...
bioMart
Batch Correct Multiple Single Cell Objects
integrate(sce_list, organism = "human", ...)
integrate(sce_list, organism = "human", ...)
sce_list |
List of two or more SingleCellExperiment objects |
organism |
human or mouse |
... |
extra args passed to sce_reduce_dimensions |
an integrated SingleCellExperiment object
Integrate multiple objects and save to file
integration_workflow( batches, excluded_cells = NULL, resolution = seq(0.2, 1, by = 0.2), experiment_name = "default_experiment", organism = "human", ... )
integration_workflow( batches, excluded_cells = NULL, resolution = seq(0.2, 1, by = 0.2), experiment_name = "default_experiment", organism = "human", ... )
batches |
objects for all batches provided as a list. If named, the resulting integrated object will be identified with corresponding values in 'batch' metadata |
excluded_cells |
named list of cells to exclude |
resolution |
value(s) to control the clustering resolution
via |
experiment_name |
arbitrary name to identify experiment |
organism |
either "human" or "mouse" |
... |
extra args passed to sce_integrate |
an integrated SingleCellExperiment object
Load a tibble of bigwig file paths by cell id
load_bigwigs(object, bigwig_db = "~/.cache/chevreul/bw-files.db")
load_bigwigs(object, bigwig_db = "~/.cache/chevreul/bw-files.db")
object |
A object |
bigwig_db |
Sqlite database of bigwig files |
a vector of bigwigs file paths
Load SingleCellExperiment Files from a single project path
load_sce_from_proj(proj_dir, ...)
load_sce_from_proj(proj_dir, ...)
proj_dir |
project directory |
... |
extra args passed to load_sce_path |
a SingleCellExperiment object
Read in Gene and Transcript SingleCellExperiment Objects
load_sce_path(proj_dir = getwd(), prefix = "unfiltered")
load_sce_path(proj_dir = getwd(), prefix = "unfiltered")
proj_dir |
path to project directory |
prefix |
default "unfiltered" |
a SingleCellExperiment object
Make Bigwig Database
make_bigwig_db( new_project = NULL, cache_location = "~/.cache/chevreul/", sqlite_db = "bw-files.db" )
make_bigwig_db( new_project = NULL, cache_location = "~/.cache/chevreul/", sqlite_db = "bw-files.db" )
new_project |
Project directory |
cache_location |
Path to cache "~/.cache/chevreul" |
sqlite_db |
sqlite db containing bw files |
a sqlite database of bigwig files for cells in a SingleCellExperiment object
Merge Small SingleCellExperiment Objects
merge_small_sces(..., k.filter = 50)
merge_small_sces(..., k.filter = 50)
... |
two or more singlecell objects |
k.filter |
minimum cell number for integration |
a SingleCellExperiment object
Retrieve Metadata from Batch
metadata_from_batch( batch, projects_dir = "/dataVolume/storage/single_cell_projects", db_path = "single-cell-projects.db" )
metadata_from_batch( batch, projects_dir = "/dataVolume/storage/single_cell_projects", db_path = "single-cell-projects.db" )
batch |
batch |
projects_dir |
path to project dir |
db_path |
path to .db file |
a tibble with cell level metadata from a SingleCellExperiment object
Propagate Metadata Changes
propagate_spreadsheet_changes(meta, object)
propagate_spreadsheet_changes(meta, object)
meta |
updated metadata |
object |
a SingleCellExperiment object |
a SingleCellExperiment object
data(small_example_dataset) new_meta <- data.frame(row.names = colnames(small_example_dataset)) new_meta$example <- "example" propagate_spreadsheet_changes(new_meta, small_example_dataset)
data(small_example_dataset) new_meta <- data.frame(row.names = colnames(small_example_dataset)) new_meta$example <- "example" propagate_spreadsheet_changes(new_meta, small_example_dataset)
Query Experiment
query_experiment(object, experiment)
query_experiment(object, experiment)
object |
a SingleCellExperiment object |
experiment |
an experiment name |
logical scalar indicating if experiment is present in object
data(small_example_dataset) query_experiment(small_example_dataset, "gene")
data(small_example_dataset) query_experiment(small_example_dataset, "gene")
Reads database of chevreul projects to a data frame
read_project_db( cache_location = "~/.cache/chevreul", sqlite_db = "single-cell-projects.db", verbose = TRUE )
read_project_db( cache_location = "~/.cache/chevreul", sqlite_db = "single-cell-projects.db", verbose = TRUE )
cache_location |
Path to cache "~/.cache/chevreul" |
sqlite_db |
sqlite db |
verbose |
print messages |
a tibble with SingleCellExperiment objects
Records miscellaneous data
record_experiment_data( object, experiment_name = "default_experiment", organism = "human" )
record_experiment_data( object, experiment_name = "default_experiment", organism = "human" )
object |
A object |
experiment_name |
name of the experiment |
organism |
human or mouse |
a SingleCellExperiment object
data(small_example_dataset) record_experiment_data(small_example_dataset)
data(small_example_dataset) record_experiment_data(small_example_dataset)
Regress SingleCellExperiment Object by Given Set of Genes
regress_cell_cycle(object)
regress_cell_cycle(object)
object |
A object |
a SingleCellExperiment object with features regressed
This function takes a SCE object and performs the below steps
split by batch
integrate
run integration pipeline and save
reintegrate_sce(object, suffix = "", reduction = "PCA", ...)
reintegrate_sce(object, suffix = "", reduction = "PCA", ...)
object |
A SingleCellExperiment objects |
suffix |
to be appended to file saved in output dir |
reduction |
to use default is pca |
... |
extra args passed to sce_integrate |
a SingleCellExperiment object
Give a new project name to a SingleCellExperiment object
rename_sce(object, new_name)
rename_sce(object, new_name)
object |
A SingleCellExperiment object |
new_name |
New name to assign |
a renamed SingleCellExperiment object
data(small_example_dataset) rename_sce(small_example_dataset, "new_name")
data(small_example_dataset) rename_sce(small_example_dataset, "new_name")
Retrieve Assay
retrieve_experiment(object, experiment)
retrieve_experiment(object, experiment)
object |
a SingleCellExperiment object |
experiment |
an experiment name |
Main or alt experiment in a SingleCellExperiment object
data(small_example_dataset) mainExpName(small_example_dataset) <- "gene" retrieve_experiment(small_example_dataset, experiment = "gene")
data(small_example_dataset) mainExpName(small_example_dataset) <- "gene" retrieve_experiment(small_example_dataset, experiment = "gene")
Save object to
save_sce(object, prefix = "unfiltered", proj_dir = getwd())
save_sce(object, prefix = "unfiltered", proj_dir = getwd())
object |
a SingleCellExperiment object |
prefix |
a prefix for saving |
proj_dir |
path to a project directory |
a path to an rds file containing a SingleCellExperiment object
Recalculate counts/features per cell for a object
sce_calcn(object)
sce_calcn(object)
object |
A SingleCellExperiment object |
a SingleCellExperiment object with nfeatures and ngenes stored in metadata
data(small_example_dataset) sce_calcn(small_example_dataset)
data(small_example_dataset) sce_calcn(small_example_dataset)
Run Louvain Clustering at Multiple Resolutions
sce_cluster( object = object, resolution = 0.6, custom_clust = NULL, reduction = "PCA", algorithm = 1, ... )
sce_cluster( object = object, resolution = 0.6, custom_clust = NULL, reduction = "PCA", algorithm = 1, ... )
object |
A SingleCellExperiment objects |
resolution |
Clustering resolution |
custom_clust |
custom cluster |
reduction |
Set dimensional reduction object |
algorithm |
1 |
... |
extra args passed to single cell packages |
a SingleCellExperiment object with louvain clusters
data(small_example_dataset) sce_cluster(small_example_dataset)
data(small_example_dataset) sce_cluster(small_example_dataset)
Run Differential Expression
sce_de( object, cluster1, cluster2, resolution = 0.2, diffex_scheme = "louvain", featureType = "gene", tests = c("t", "wilcox", "bimod") )
sce_de( object, cluster1, cluster2, resolution = 0.2, diffex_scheme = "louvain", featureType = "gene", tests = c("t", "wilcox", "bimod") )
object |
a SingleCellExperiment object |
cluster1 |
cluster 1 |
cluster2 |
cluster 2 |
resolution |
resolution |
diffex_scheme |
scheme for differential expression |
featureType |
gene or transcript |
tests |
t, wilcox, or bimod |
a dataframe with differential expression information
data("tiny_sce") sce_de(tiny_sce, colnames(tiny_sce)[1:100], colnames(tiny_sce)[101:200], diffex_scheme = "custom")
data("tiny_sce") sce_de(tiny_sce, colnames(tiny_sce)[1:100], colnames(tiny_sce)[101:200], diffex_scheme = "custom")
Run batch correction, followed by:
stashing of batches in metadata 'batch'
clustering with resolution 0.2 to 2.0 in increments of 0.2
saving to <proj_dir>/output/sce/
sce_integrate( sce_list, resolution = seq(0.2, 1, by = 0.2), suffix = "", organism = "human", annotate_cell_cycle = FALSE, annotate_percent_mito = FALSE, reduction = "corrected", ... )
sce_integrate( sce_list, resolution = seq(0.2, 1, by = 0.2), suffix = "", organism = "human", annotate_cell_cycle = FALSE, annotate_percent_mito = FALSE, reduction = "corrected", ... )
sce_list |
List of objects to be integrated |
resolution |
Range of resolution |
suffix |
a suffix to be appended to a file save in output dir |
organism |
Default "human" |
annotate_cell_cycle |
whether to score cell cycle phases |
annotate_percent_mito |
logical scalar whether to annotate mitochondrial percentage |
reduction |
pca, umap, or tsne |
... |
extra args passed to integrate |
an integrated SingleCellExperiment object
data("small_example_dataset") small_example_dataset |> splitByCol("Mutation_Status") |> sce_integrate()
data("small_example_dataset") small_example_dataset |> splitByCol("Mutation_Status") |> sce_integrate()
Performs standard pre-processing workflow for scRNA-seq data
sce_preprocess( object, scale = TRUE, normalize = TRUE, features = NULL, legacy_settings = FALSE, ... )
sce_preprocess( object, scale = TRUE, normalize = TRUE, features = NULL, legacy_settings = FALSE, ... )
object |
Assay to use |
scale |
Perform linear transformation 'Scaling' |
normalize |
Perform normalization |
features |
Identify highly variable features |
legacy_settings |
Use legacy settings |
... |
extra args passed to scaling functions |
a preprocessed SingleCellExperiment object
data("small_example_dataset") sce_preprocess(small_example_dataset)
data("small_example_dataset") sce_preprocess(small_example_dataset)
This functions allows you to preprocess, cluster and reduce dimensions for one SingleCellExperiment object.
sce_process( object, experiment = "gene", resolution = 0.6, reduction = "PCA", organism = "human", ... )
sce_process( object, experiment = "gene", resolution = 0.6, reduction = "PCA", organism = "human", ... )
object |
A SingleCellExperiment object |
experiment |
Assay of interest in SingleCellExperiment object |
resolution |
Resolution for clustering cells. Default set to 0.6. |
reduction |
Dimensional reduction object |
organism |
Organism |
... |
extra parameters passed to internal functions |
a processed SingleCellExperiment object
data(small_example_dataset) sce_process(small_example_dataset)
data(small_example_dataset) sce_process(small_example_dataset)
Run PCA, TSNE and UMAP on a singlecell objects perplexity should not be bigger than 3 * perplexity < nrow(X) - 1, see details for interpretation
sce_reduce_dimensions(object, experiment = "gene", ...)
sce_reduce_dimensions(object, experiment = "gene", ...)
object |
A SingleCellExperiment object |
experiment |
Experiment of interest to be processed |
... |
Extra parameters passed to sce_reduce_dimensions |
a SingleCellExperiment object with embeddings
Set cell metadata from a given object
set_colData(object, meta)
set_colData(object, meta)
object |
a SingleCellExperiment object |
meta |
a dataframe containing object metadata |
a SingleCellExperiment object with new colData
data(small_example_dataset) new_meta <- data.frame(row.names = colnames(small_example_dataset)) new_meta$example <- "example" set_colData(small_example_dataset, new_meta)
data(small_example_dataset) new_meta <- data.frame(row.names = colnames(small_example_dataset)) new_meta$example <- "example" set_colData(small_example_dataset, new_meta)
Set Feature Types
set_feature_type(object, feature_type)
set_feature_type(object, feature_type)
object |
a SingleCellExperiment object |
feature_type |
feature type |
a SingleCellExperiment object with assigned feature type
data(small_example_dataset) set_feature_type(small_example_dataset, "transcript")
data(small_example_dataset) set_feature_type(small_example_dataset, "transcript")
created with scuttle::mockSCE
small_example_dataset
small_example_dataset
An SCE with 200 cells and 1000 genes
scuttle::mockSCE
Split SingleCellExperiment by colData variable
splitByCol(x, f = "batch")
splitByCol(x, f = "batch")
x |
SingleCellExperiment object |
f |
colData variable as a string |
a list of singlecellexperiments name by colData value
data(small_example_dataset) splitByCol(small_example_dataset, "batch")
data(small_example_dataset) splitByCol(small_example_dataset, "batch")
Marker Genes will be stored in object metadata as markers
stash_marker_features( object, group_by, experiment = "gene", top_n = 200, p_val_cutoff = 0.5 )
stash_marker_features( object, group_by, experiment = "gene", top_n = 200, p_val_cutoff = 0.5 )
object |
A object |
group_by |
A metadata variable to group by |
experiment |
An experiment to use |
top_n |
Use top n genes, Default 200 |
p_val_cutoff |
p value cut-off, Default value is "0.5" |
a SingleCellExperiment object with marker genes
data("small_example_dataset") small_example_dataset <- find_all_markers(small_example_dataset, "gene_snn_res.1") stash_marker_features(small_example_dataset, "gene_snn_res.1")
data("small_example_dataset") small_example_dataset <- find_all_markers(small_example_dataset, "gene_snn_res.1") stash_marker_features(small_example_dataset, "gene_snn_res.1")
Subset the object using new colData
subset_by_colData(colData_path, object)
subset_by_colData(colData_path, object)
colData_path |
Path to new colData |
object |
A object |
a SingleCellExperiment object
subset to only NRL from chevreuldata::human_gene_transcript_sce()
tiny_sce
tiny_sce
An SCE with only expression of NRL gene and NRL transripts
chevreuldata::human_gene_transcript_sce()
Convert ensembl transcript ids to hgnc gene symbols
transcripts_to_genes(transcripts, organism = "human")
transcripts_to_genes(transcripts, organism = "human")
transcripts |
transcripts |
organism |
human or mouse |
a vector of gene symbols
NRL_transcripts_hs <- c("ENST00000359842", "ENST00000470566", "ENST00000465764") data("grch38_tx2gene") data("grch38") transcripts_to_genes(transcripts = NRL_transcripts_hs)
NRL_transcripts_hs <- c("ENST00000359842", "ENST00000470566", "ENST00000465764") data("grch38_tx2gene") data("grch38") transcripts_to_genes(transcripts = NRL_transcripts_hs)
Add new/update existing projects to the database by recursing fully
update_project_db( projects_dir = NULL, cache_location = "~/.cache/chevreul", sqlite_db = "single-cell-projects.db", verbose = TRUE )
update_project_db( projects_dir = NULL, cache_location = "~/.cache/chevreul", sqlite_db = "single-cell-projects.db", verbose = TRUE )
projects_dir |
The project directory to be updated |
cache_location |
Path to cache "~/.cache/chevreul" |
sqlite_db |
sqlite db |
verbose |
print messages |
a sqlite database with SingleCellExperiment objects