Load expression data 1 2 3 4 5 6 7 8 9 library( Seurat) library( ggplot2) data <- read.table( file = "sample.counts.tsv" , header = T , row.names = 1 ) data <- Read10X( data.dir = "/opt/data/filtered_feature_bc_matrix" )
Create seurat object 1 sobj <- CreateSeuratObject( counts = data, min.cells = 3 , min.features = 200 )
Quality control 1 2 3 4 sobj[[ "percent.mt" ] ] <- PercentageFeatureSet( sobj, pattern = "^MT-" ) qc_violin <- VlnPlot( sobj, features = c ( "nFeature_RNA" , "nCount_RNA" , "percent.mt" ) , ncol = 3 ) ggsave( "qc_violin.pdf" , qc_violin) sobj <- subset( sobj, subset = nFeature_RNA > 1000 & nFeature_RNA < 4000 & percent.mt < 40 )
Normalization 1 sobj <- NormalizeData( sobj, normalization.method = "LogNormalize" , scale.factor = 10000 )
Highly variable genes (HVGs) 1 sobj <- FindVariableFeatures( sobj, selection.method = "vst" , nfeatures = 2000 )
Scaling 1 2 all.genes <- rownames( sobj) sobj <- ScaleData( sobj, features = all.genes)
Linear dimensional reduction PCA 1 sobj <- RunPCA( sobj, features = VariableFeatures( object = sobj) )
Determine dimensionality 1 2 3 4 5 6 sobj <- JackStraw( sobj, num.replicate = 100 ) sobj <- ScoreJackStraw( sobj, dims = 1 : 20 ) plot1 <- JackStrawPlot( sobj, dims = 1 : 15 ) plot2 <- ElbowPlot( sobj) pc <- CombinePlots( plots = list ( plot1, plot2) , legend = "bottom" ) ggsave( "pc.pdf" , pc)
Clustering 1 2 sobj <- FindNeighbors( sobj, dims = 1 : 10 ) sobj <- FindClusters( sobj, resolution = 0.5 )
Non-linear dimensional reduction UMAP 1 2 3 sobj <- RunUMAP( sobj, dims = 1 : 10 ) umap <- DimPlot( sobj, reduction = "umap" ) ggsave( "umap.pdf" , umap)
tSNE 1 2 3 sobj <- RunTSNE( sobj, dims = 1 : 10 ) tsne <- DimPlot( sobj, reduction = "tsne" ) ggsave( "tsne.pdf" , tsne)
Find markers 1 2 3 4 5 6 library( magrittr) library( dplyr) cluster_markers <- FindAllMarkers( sobj, only.pos = TRUE , min.pct = 0.25 , logfc.threshold = 0.25 ) cluster_markers %>% group_by( cluster) %>% slice_max( n = 2 , order_by = avg_log2FC) -> markers
SingleR for assigning cell type 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 load( file = "HumanPrimaryCellAtlas.Rdata" ) library( SingleR) sobj_data <- GetAssayData( sobj, slot = "data" ) clusters <- sobj$ seurat_clusters cell_annotation <- SingleR( test = sobj_data, ref = ref, labels = ref$ label.main, clusters = clusters) new.clusterID <- cell_annotation$ labels names ( new.clusterID) <- levels( sobj) sobj.new <- RenameIdents( sobj, new.clusterID) umap_label <- DimPlot( sobj.new, label = T , label.size = 3 , reduction = "umap" ) ggsave( "umap_label.pdf" , umap_label) tsne_label <- DimPlot( sobj.new, label = T , label.size = 3 , reduction = "tsne" ) ggsave( "tsne_label.pdf" , tsne_label) saveRDS( sobj.new, file = "sample_label.rds" )
Installation of Seurat, SingleR and celldex:
1 2 3 4 5 install.packages( "Seurat" ) if ( ! require( SingleR) ) BiocManager:: install( "SingleR" ) install.packages( "remotes" ) remotes:: install_github( "LTLA/celldex" )
Reference: