Seurat单细胞分析流程

Load expression data

1
2
3
4
5
6
7
8
9
library(Seurat)
library(ggplot2)
# Read count data (row is gene, column is cell)
data <- read.table(file = "sample.counts.tsv", header = T, row.names = 1)
# Read 10X data (data dir contains 3 files: barcodes.tsv.gz, features.tsv.gz, matrix.mtx.gz)
# The three files must be the same name above
# If the features.tsv.gz only contains 1 column, it should add parameter like this:
# data <- Read10X(data.dir = "/opt/data/filtered_feature_bc_matrix", gene.column = 1)
data <- Read10X(data.dir = "/opt/data/filtered_feature_bc_matrix")

Create seurat object

1
sobj <- CreateSeuratObject(counts = data, min.cells = 3, min.features = 200)

Quality control

1
2
3
4
sobj[["percent.mt"]] <- PercentageFeatureSet(sobj, pattern = "^MT-")
qc_violin <- VlnPlot(sobj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
ggsave("qc_violin.pdf", qc_violin)
sobj <- subset(sobj, subset = nFeature_RNA > 1000 & nFeature_RNA < 4000 & percent.mt < 40)

Normalization

1
sobj <- NormalizeData(sobj, normalization.method = "LogNormalize", scale.factor = 10000)

Highly variable genes (HVGs)

1
sobj <- FindVariableFeatures(sobj, selection.method = "vst", nfeatures = 2000)

Scaling

1
2
all.genes <- rownames(sobj)
sobj <- ScaleData(sobj, features = all.genes)

Linear dimensional reduction PCA

1
sobj <- RunPCA(sobj, features = VariableFeatures(object = sobj))

Determine dimensionality

1
2
3
4
5
6
sobj <- JackStraw(sobj, num.replicate = 100)
sobj <- ScoreJackStraw(sobj, dims = 1:20)
plot1 <- JackStrawPlot(sobj, dims = 1:15)
plot2 <- ElbowPlot(sobj)
pc <- CombinePlots(plots = list(plot1, plot2), legend = "bottom")
ggsave("pc.pdf", pc)

Clustering

1
2
sobj <- FindNeighbors(sobj, dims = 1:10)
sobj <- FindClusters(sobj, resolution = 0.5)

Non-linear dimensional reduction

UMAP

1
2
3
sobj <- RunUMAP(sobj, dims = 1:10)
umap <- DimPlot(sobj, reduction = "umap")
ggsave("umap.pdf", umap)

tSNE

1
2
3
sobj <- RunTSNE(sobj, dims = 1:10)
tsne <- DimPlot(sobj, reduction = "tsne")
ggsave("tsne.pdf", tsne)

Find markers

1
2
3
4
5
6
library(magrittr)
library(dplyr)
cluster_markers <- FindAllMarkers(sobj, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
cluster_markers %>%
group_by(cluster) %>%
slice_max(n = 2, order_by = avg_log2FC) -> markers

SingleR for assigning cell type

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# library(celldex)
# ref <- celldex::HumanPrimaryCellAtlasData()
# save(ref, file = 'HumanPrimaryCellAtlas.Rdata')
load(file = "HumanPrimaryCellAtlas.Rdata")
library(SingleR)
sobj_data <- GetAssayData(sobj, slot = "data")
clusters <- sobj$seurat_clusters
cell_annotation <- SingleR(test = sobj_data, ref = ref, labels = ref$label.main, clusters = clusters)
new.clusterID <- cell_annotation$labels
names(new.clusterID) <- levels(sobj)
sobj.new <- RenameIdents(sobj, new.clusterID)
umap_label <- DimPlot(sobj.new, label = T, label.size = 3, reduction = "umap")
ggsave("umap_label.pdf", umap_label)
tsne_label <- DimPlot(sobj.new, label = T, label.size = 3, reduction = "tsne")
ggsave("tsne_label.pdf", tsne_label)
saveRDS(sobj.new, file = "sample_label.rds")
# Analyze annotation result
# plot1 <- plotScoreHeatmap(cell_annotation)
# ggsave("scoreheatmap.pdf", plot1)
# plot2 <- plotDeltaDistribution(cell_annotation, ncol = 3)
# ggsave("distribution.pdf", plot2)
# label_cluster <- table(label = cell_annotation$labels, cluster = clusters)
# library(pheatmap)
# plot3 <- pheatmap(log10(label_cluster + 10))
# ggsave("heatmap.pdf", plot3)

Installation of Seurat, SingleR and celldex:

1
2
3
4
5
install.packages("Seurat")
if(!require(SingleR))
BiocManager::install("SingleR")
install.packages("remotes")
remotes::install_github("LTLA/celldex")

Reference:

Author: Giftbear
Link: https://giftbear.github.io/2023/04/13/Seurat单细胞分析流程/
Copyright Notice: All articles in this blog are licensed under CC BY-NC-SA 4.0 unless stating additionally.