-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
2183 lines (965 loc) · 74.9 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html class="theme-next pisces use-motion" lang="en">
<head>
<meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
<meta name="theme-color" content="#222">
<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />
<link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css" />
<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />
<link href="/css/main.css?v=5.1.4" rel="stylesheet" type="text/css" />
<link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png?v=5.1.4">
<link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png?v=5.1.4">
<link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png?v=5.1.4">
<link rel="mask-icon" href="/images/logo.svg?v=5.1.4" color="#222">
<meta name="keywords" content="Hexo, NexT" />
<meta name="description" content="生物信息 记录 分享 博客">
<meta property="og:type" content="website">
<meta property="og:title" content="Feilijiang">
<meta property="og:url" content="http://yoursite.com/index.html">
<meta property="og:site_name" content="Feilijiang">
<meta property="og:description" content="生物信息 记录 分享 博客">
<meta property="og:locale" content="en_US">
<meta property="article:author" content="Feilijiang">
<meta name="twitter:card" content="summary">
<script type="text/javascript" id="hexo.configurations">
var NexT = window.NexT || {};
var CONFIG = {
root: '/',
scheme: 'Pisces',
version: '5.1.4',
sidebar: {"position":"left","display":"post","offset":12,"b2t":true,"scrollpercent":true,"onmobile":false},
fancybox: true,
tabs: true,
motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
duoshuo: {
userId: '0',
author: 'Author'
},
algolia: {
applicationID: '',
apiKey: '',
indexName: '',
hits: {"per_page":10},
labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
}
};
</script>
<link rel="canonical" href="http://yoursite.com/"/>
<title>Feilijiang</title>
<meta name="generator" content="Hexo 4.2.0"></head>
<body itemscope itemtype="http://schema.org/WebPage" lang="en">
<div class="container sidebar-position-left
page-home">
<div class="headband"></div>
<header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
<div class="header-inner"><div class="site-brand-wrapper">
<div class="site-meta ">
<div class="custom-logo-site-title">
<a href="/" class="brand" rel="start">
<span class="logo-line-before"><i></i></span>
<span class="site-title">Feilijiang</span>
<span class="logo-line-after"><i></i></span>
</a>
</div>
<p class="site-subtitle">New Beginnings</p>
</div>
<div class="site-nav-toggle">
<button>
<span class="btn-bar"></span>
<span class="btn-bar"></span>
<span class="btn-bar"></span>
</button>
</div>
</div>
<nav class="site-nav">
<ul id="menu" class="menu">
<li class="menu-item menu-item-home">
<a href="/%20" rel="section">
<i class="menu-item-icon fa fa-fw fa-home //首页"></i> <br />
Home
</a>
</li>
<li class="menu-item menu-item-tags">
<a href="/tags/%20" rel="section">
<i class="menu-item-icon fa fa-fw fa-tags //标签"></i> <br />
Tags
</a>
</li>
<li class="menu-item menu-item-categories">
<a href="/categories/%20" rel="section">
<i class="menu-item-icon fa fa-fw fa-th //分类"></i> <br />
Categories
</a>
</li>
<li class="menu-item menu-item-schedule">
<a href="/schedule/%20" rel="section">
<i class="menu-item-icon fa fa-fw fa-calendar //日程表"></i> <br />
Schedule
</a>
</li>
</ul>
</nav>
</div>
</header>
<main id="main" class="main">
<div class="main-inner">
<div class="content-wrap">
<div id="content" class="content">
<section id="posts" class="posts-expand">
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="http://yoursite.com/2021/12/10/%E5%8F%AF%E5%8F%98%E5%89%AA%E5%88%87/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Feilijiang">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/avatar.gif">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="Feilijiang">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
<a class="post-title-link" href="/2021/12/10/%E5%8F%AF%E5%8F%98%E5%89%AA%E5%88%87/" itemprop="url">可变剪切</a></h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">Posted on</span>
<time title="Post created" itemprop="dateCreated datePublished" datetime="2021-12-10T10:58:08+08:00">
2021-12-10
</time>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<p>参考<br><a href="https://blog.csdn.net/dikuangzhong6068/article/details/101198262?utm_medium=distribute.pc_relevant.none-task-blog-2~default~baidujs_baidulandingword~default-0.highlightwordscore&spm=1001.2101.3001.4242.1" target="_blank" rel="noopener">https://blog.csdn.net/dikuangzhong6068/article/details/101198262?utm_medium=distribute.pc_relevant.none-task-blog-2~default~baidujs_baidulandingword~default-0.highlightwordscore&spm=1001.2101.3001.4242.1</a> </p>
<h2 id="概念"><a href="#概念" class="headerlink" title="概念"></a>概念</h2><p>可变剪接(alternative splicing),在真核生物中是一种非常基本的生物学事件。即基因转录后,先产生初始RNA或称作RNA前体,然后再通过可变剪接方式,选择性的把不同的外显子进行重连,从而产生不同的剪接异构体(isoform)。这种方式,使得一个基因可产生多个不同的转录本,这些转录本分别在细胞/个体分化发育的不同阶段,在不同的组织中有各自特异的表达和功能,从而极大地丰富了编码RNA和非编码RNA种类和数量,进而增加了转录组和蛋白质组的复杂性。</p>
<h2 id="形式"><a href="#形式" class="headerlink" title="形式"></a>形式</h2><p>1、外显子跳跃,英文Exon Skipping, 或者叫做cassette exon;</p>
<p>2、内含子保留,英文Intron Retention;</p>
<p>3、互斥外显子,英文Mutually Exclusive Exons;</p>
<p>4、外显子5’端的选择性剪接,Alternative 5’ splice site,A5SS</p>
<p>5、外显子3’端的选择性剪接,Alternative 3’ splice site,A3SS<br><img src="https://gitee.com/feilijiang/img/raw/master/aHR0cHM6Ly91cGxvYWQtaW1hZ2VzLmppYW5zaHUuaW8vdXBsb2FkX2ltYWdlcy8xOTQzMTY2OC1kN2M5MGVjNWYzYzMyNTU2LnBuZw.png" alt=""></p>
<h2 id="单细胞领域工具"><a href="#单细胞领域工具" class="headerlink" title="单细胞领域工具"></a>单细胞领域工具</h2><p>STARsolo: mapping, demultiplexing and gene quantification for single cell RNA-seq.</p>
<h3 id="别人的总结"><a href="#别人的总结" class="headerlink" title="别人的总结"></a>别人的总结</h3><p><img src="https://gitee.com/feilijiang/img/raw/master/20211210110232.png" alt=""></p>
<p><img src="https://gitee.com/feilijiang/img/raw/master/20211210110309.png" alt=""></p>
<h3 id="自己的搜索"><a href="#自己的搜索" class="headerlink" title="自己的搜索"></a>自己的搜索</h3><ul>
<li><p>scVelo - [Python] - scVelo is a scalable toolkit for RNA velocity analysis in single cells. It generalizes the concept of RNA velocity by relaxing previously made assumptions with a dynamical model. It allows to identify putative driver genes, infer a latent time, estimate reaction rates of transcription, splicing and degradation, and detect competing kinetics. 怎么去看呢,是否有具体的信息</p>
</li>
<li><p>SingleSplice - [R, perl, C++] - A tool for detecting biological variation in alternative splicing within a population of single cells. See Welch et al. 2016. 需要ERCC作为参照</p>
</li>
<li><p>rMATS - [Python] - RNA-Seq Multavariate Analysis of Transcript Splicing. 2014年发表的工具,后面有速度的提升版本,目测不能用于3’数据。</p>
</li>
<li><p>outrigger - [Python] - Outrigger is a program to calculate alternative splicing scores of RNA-Seq data based on junction reads and a de novo, custom annotation created with a graph database, especially made for single-cell analyses.作为Expedition的一个部分,专门为单细胞设计。文献里面是C1的数据来做的。</p>
</li>
<li><p>ICGS - [Python] - Iterative Clustering and Guide-gene Selection (Olsson et al. Nature 2016). Identify discrete, transitional and mixed-lineage states from diverse single-cell transcriptomics platforms. Integrated FASTQ pseudoalignment /quantification (Kallisto), differential expression, cell-type prediction and optional cell cycle exclusion analyses. Specialized methods for processing BAM and 10X Genomics spares matrix files. Associated single-cell splicing PSI methods (MultIPath-PSI). Apart of the AltAnalyze toolkit along with accompanying visualization methods (e.g., heatmap, t-SNE, SashimiPlots, network graphs). Easy-to-use graphical user and commandline interfaces.</p>
</li>
<li><p>flotilla - [Python] - Reproducible machine learning analysis of gene expression and alternative splicing data</p>
</li>
<li><p>Sierra: discovery of differential transcript usage from polyA-captured single-cell RNA-seq data,可以针对3’端的数据</p>
</li>
<li><p>BRIE:不适用于3‘端测序数据</p>
</li>
<li><p>LeafCutter:不是单细胞的方法</p>
</li>
</ul>
<ul>
<li>SpliZ:RNA splicing programs define tissue compartments and cell types at single-cell resolution</li>
</ul>
<h2 id="分析目的"><a href="#分析目的" class="headerlink" title="分析目的"></a>分析目的</h2><h3 id="xbp1两种形式的功能研究。"><a href="#xbp1两种形式的功能研究。" class="headerlink" title="xbp1两种形式的功能研究。"></a>xbp1两种形式的功能研究。</h3><h3 id="xbp1的两种剪切体的形式:Xbp1属于外显子3‘端的选择性剪切"><a href="#xbp1的两种剪切体的形式:Xbp1属于外显子3‘端的选择性剪切" class="headerlink" title="xbp1的两种剪切体的形式:Xbp1属于外显子3‘端的选择性剪切"></a>xbp1的两种剪切体的形式:Xbp1属于外显子3‘端的选择性剪切</h3><h3 id="定量两种isoform,并且得到两个group(ko和wt)中分别的表达量。"><a href="#定量两种isoform,并且得到两个group(ko和wt)中分别的表达量。" class="headerlink" title="定量两种isoform,并且得到两个group(ko和wt)中分别的表达量。"></a>定量两种isoform,并且得到两个group(ko和wt)中分别的表达量。</h3><p>问题<br>没有搞清楚scVelo得到的文件是否有两个isoform的定量信息,得到的信息为unspliced和spliced的两个矩阵。没有不同isoform的格式。<br>如果用其他方法的话,Sierra,ISOP,ICGS 哪个能用</p>
<p>ICGS:更名为AltAnalyze,适用于单细胞数据,可以用于3’端实验的分析,似乎使用率很高。没有搞懂内容<br><a href="https://altanalyze.readthedocs.io/en/latest/" target="_blank" rel="noopener">https://altanalyze.readthedocs.io/en/latest/</a></p>
<p>review 里面的东西:<br>ISOP:10x数据可以用,是R包,下游处理差异表达的,输入是什么?输入就是一个isoform乘以sample的矩阵,上游是利用cufflinks对比对后的bam产生两个矩阵。需要全长序列<br><a href="https://academic.oup.com/bioinformatics/article/34/14/2392/4911530" target="_blank" rel="noopener">https://academic.oup.com/bioinformatics/article/34/14/2392/4911530</a><br><a href="https://github.com/nghiavtr/ISOP" target="_blank" rel="noopener">https://github.com/nghiavtr/ISOP</a><br><img src="https://gitee.com/feilijiang/img/raw/master/20211210185044.png" alt=""></p>
<p>Sierra:GB的方法,discovery of differential transcript usage from polyA-captured single-cell RNA-seq data<br><a href="https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02071-7" target="_blank" rel="noopener">https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02071-7</a><br><a href="https://github.com/VCCRI/Sierra" target="_blank" rel="noopener">https://github.com/VCCRI/Sierra</a></p>
<p>得到一个序列比对位置peak的矩阵,鉴定used polyadenylated sites in scRNA-seq data。<br><img src="https://gitee.com/feilijiang/img/raw/master/20211210164001.png" alt=""></p>
<p>查看单个基因的isoform,需要提取从bam里面提取这个基因,进行查看。</p>
<p>三个方法的输入输出,可以做哪些分析。</p>
<p> The 10X data were not equipped for alternative splicing analysis due to the 3′-bias (Figure 6C, Figure S8C). Nevertheless, 10X still detected non-negligible number of junctions, even though they only accounted for approximately 50% of those junctions detected by Smart-seq2. Although Smart-seq2 data were clearly much more suitable for alternative splicing studies [41], [42], the limited number of splicing junctions detected by 10X might be suitable for certain analyses that rely on junction-based characterization, such as the RNA velocity analysis [43].</p>
<p>有10x的isoform的鉴定<br> “Isoform specificity in the mouse primary motor cortex”<br> <a href="https://github.com/pachterlab/BYVSTZP_2020/blob/master/analysis/notebooks/10xv3/final-10x_isoform.ipynb" target="_blank" rel="noopener">https://github.com/pachterlab/BYVSTZP_2020/blob/master/analysis/notebooks/10xv3/final-10x_isoform.ipynb</a></p>
<p>Modular, efficient and constant-memory single-cell RNA-seq preprocessing<br><a href="https://www.nature.com/articles/s41587-021-00870-2" target="_blank" rel="noopener">https://www.nature.com/articles/s41587-021-00870-2</a></p>
<p>A discriminative learning approach to differential expression analysis for single-cell RNA-seq<br> <a href="https://github.com/pachterlab/NYMP_2018/blob/master/10x_example-logR/10x_example_logR-TCC_notebook.ipynb" target="_blank" rel="noopener">https://github.com/pachterlab/NYMP_2018/blob/master/10x_example-logR/10x_example_logR-TCC_notebook.ipynb</a><br> 产生TCC matrix:<a href="https://github.com/pachterlab/scRNA-Seq-TCC-prep" target="_blank" rel="noopener">https://github.com/pachterlab/scRNA-Seq-TCC-prep</a></p>
<p><a href="https://www.nature.com/articles/s41592-018-0303-9#code-availability" target="_blank" rel="noopener">https://www.nature.com/articles/s41592-018-0303-9#code-availability</a></p>
<p>kallisto<br><a href="https://www.kallistobus.tools/kb_usage/kb_usage/" target="_blank" rel="noopener">https://www.kallistobus.tools/kb_usage/kb_usage/</a><br>使用bustool可以生成TCC,但是需要bus file的输入,有fastq文件生成。</p>
<p>AltAnalyze<br><a href="http://altanalyze.blogspot.com/2016/08/bye-bye-bed-files-welcome-bam.html" target="_blank" rel="noopener">http://altanalyze.blogspot.com/2016/08/bye-bye-bed-files-welcome-bam.html</a></p>
<p><a href="https://altanalyze.readthedocs.io/en/latest/RunningAltAnalyze/#selecting-the-rna-seq-analysis-method" target="_blank" rel="noopener">https://altanalyze.readthedocs.io/en/latest/RunningAltAnalyze/#selecting-the-rna-seq-analysis-method</a></p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</div>
</article>
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="http://yoursite.com/2021/12/10/%E7%AA%97%E5%8F%A3/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Feilijiang">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/avatar.gif">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="Feilijiang">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
<a class="post-title-link" href="/2021/12/10/%E7%AA%97%E5%8F%A3/" itemprop="url">窗口</a></h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">Posted on</span>
<time title="Post created" itemprop="dateCreated datePublished" datetime="2021-12-10T09:54:37+08:00">
2021-12-10
</time>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h2 id="优秀播客节目"><a href="#优秀播客节目" class="headerlink" title="优秀播客节目"></a>优秀播客节目</h2><p>到现场去<br>忽左忽右<br>反潮流俱乐部<br>跳岛<br>What‘s next | 科技早知道:一个不错的科技和商业博客<br>声动活泼<br>Justpod<br>Evolving for the Next Billion</p>
<h2 id="知识和思维的收获"><a href="#知识和思维的收获" class="headerlink" title="知识和思维的收获"></a>知识和思维的收获</h2><h3 id="无人驾驶"><a href="#无人驾驶" class="headerlink" title="无人驾驶"></a>无人驾驶</h3><p>无人驾驶商业化,取代出租车。不同的商业思维,从成本出发,从应用场景出发。<br>特斯拉,辅助驾驶,成本取决于现有买车的生意。<br>文远知行,无人商业驾驶,取代出租车,成本取决于运营的生意。<br>算力决定AI实现程度。</p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</div>
</article>
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="http://yoursite.com/2021/12/09/sc-spatial-technology/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Feilijiang">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/avatar.gif">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="Feilijiang">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
<a class="post-title-link" href="/2021/12/09/sc-spatial-technology/" itemprop="url">sc_spatial_technology</a></h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">Posted on</span>
<time title="Post created" itemprop="dateCreated datePublished" datetime="2021-12-09T10:56:28+08:00">
2021-12-09
</time>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<p><img src="https://gitee.com/feilijiang/img/raw/master/20211209105510.png" alt=""></p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</div>
</article>
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="http://yoursite.com/2021/12/08/scATAC-seq%E5%85%A5%E9%97%A8/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Feilijiang">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/avatar.gif">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="Feilijiang">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
<a class="post-title-link" href="/2021/12/08/scATAC-seq%E5%85%A5%E9%97%A8/" itemprop="url">scATAC-seq入门</a></h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">Posted on</span>
<time title="Post created" itemprop="dateCreated datePublished" datetime="2021-12-08T10:17:47+08:00">
2021-12-08
</time>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h2 id="Introduction"><a href="#Introduction" class="headerlink" title="Introduction"></a>Introduction</h2><h3 id="基本概念:"><a href="#基本概念:" class="headerlink" title="基本概念:"></a>基本概念:</h3><p>nucleosomes: 由八个组蛋白构成,包裹147bp的DNA序列。</p>
<p>Promotor:TSS附近区域</p>
<p>Enhancer:promotor上游直到1MB的位置。</p>
<p>转座酶:搬运一段DNA序列到另外一个位置,有转座酶实现插入到DNA序列,需要插入位点染色质是开放的。如果两个相邻的Tn5转座酶切割,长度40bp。文献了解:Rapid, low-input, low-bias construction of shotgun fragment libraries by high-density in vitro transposition</p>
<p>CTCF:由11个锌指蛋白组成,转录结合位点由锌指蛋白,CTCF与自身结合形成同源二聚体,和cohesiin一起作用,导致结合的DNA形成环状。</p>
<blockquote>
<p>DNA-loop:It is currently believed that the DNA loops are formed by the “loop extrusion” mechanism, whereby the cohesin ring is actively being translocated along the DNA until it meets CTCF. CTCF has to be in a proper orientation to stop cohesin. </p>
</blockquote>
<blockquote>
<p>CTCF的结合可以被CpG methylation给破环,另一方面,CTCF结合可能为DNA甲基化的扩散设置边界。</p>
</blockquote>
<blockquote>
<p><strong>与核小体的关系</strong>:CTCF binding sites act as nucleosome positioning anchors so that, when used to align various genomic signals, multiple flanking nucleosomes can be readily identified.所以used as a positive control for assessing if the ATAC-Seq experiment is good quality.Good ATAC-Seq data would have accessible regions both within and outside of TSS, for example, at some CTCF binding sites.</p>
</blockquote>
<p>异染色质:是染色质的紧密排列形式,可以沉默基因转录。 异染色质构成端粒、中心周围区域和富含重复序列的区域。 常染色质凝缩较少,含有活性最强的转录基因。active and </p>
<h3 id="基本原理"><a href="#基本原理" class="headerlink" title="基本原理"></a>基本原理</h3><p>DNA被核小体(nucleosomes包裹)。当DNA被转录的的时候,DNA会被打开,核小体会松散。很多因子,比如染色质结构,核小体的位置,组蛋白修饰都会对DNA的organization和accessiblity其重要作用。这些因子也会对基因的激活和抑制起到重要作用。ATAC-seq能够检测染色质的开放区域,来作为研究基因调控机制的一种方法。当TF结合enhancer并且连接promoter区域的时候,基因转录或者停止转录。</p>
<h3 id="技术历史"><a href="#技术历史" class="headerlink" title="技术历史"></a>技术历史</h3><ol>
<li>2013年 Nature Method ATAC-seq第一篇<blockquote>
<p>Buenrostro, Jason D., Paul G. Giresi, Lisa C. Zaba, Howard Y. Chang, and William J. Greenleaf. “Transposition of native chromatin for fast and sensitive epigenomic profiling of open chromatin, DNA-binding proteins and nucleosome position.” Nature methods 10, no. 12 (2013): 1213-1218.</p>
</blockquote>
</li>
<li>2015年 Nature scATAC-seq第一篇<blockquote>
<p>Buenrostro, Jason D., Beijing Wu, Ulrike M. Litzenburger, Dave Ruff, Michael L. Gonzales, Michael P. Snyder, Howard Y. Chang, and William J. Greenleaf. “Single-cell chromatin accessibility reveals principles of regulatory variation.” Nature 523, no. 7561 (2015): 486-490.</p>
</blockquote>
</li>
<li></li>
</ol>
<h3 id="技术原理和实验过程"><a href="#技术原理和实验过程" class="headerlink" title="技术原理和实验过程"></a>技术原理和实验过程</h3><h4 id="转座酶和转座子的区别和共性-转座酶切割的原理??"><a href="#转座酶和转座子的区别和共性-转座酶切割的原理??" class="headerlink" title="转座酶和转座子的区别和共性,转座酶切割的原理??"></a>转座酶和转座子的区别和共性,转座酶切割的原理??</h4><h4 id="技术原理"><a href="#技术原理" class="headerlink" title="技术原理"></a>技术原理</h4><h5 id="bulk"><a href="#bulk" class="headerlink" title="bulk"></a>bulk</h5><p><img src="https://gitee.com/feilijiang/img/raw/master/20211208114701.png" alt=""></p>
<h5 id="single-cell-greenleaf,renbin-10x"><a href="#single-cell-greenleaf,renbin-10x" class="headerlink" title="single cell: greenleaf,renbin, 10x,"></a>single cell: greenleaf,renbin, 10x,</h5><h4 id="实验过程"><a href="#实验过程" class="headerlink" title="实验过程"></a>实验过程</h4><p>一、提取细胞核<br>二、细胞核完整性观察及计数<br>三、开放染色质片段化后建库<br>四、片段分选去除非目的区域的DNA</p>
<h4 id="技术要点"><a href="#技术要点" class="headerlink" title="技术要点"></a>技术要点</h4><blockquote>
<p>双端测序的必要性:</p>
</blockquote>
<h4 id="技术优化"><a href="#技术优化" class="headerlink" title="技术优化"></a>技术优化</h4><h3 id="数据分析流程"><a href="#数据分析流程" class="headerlink" title="数据分析流程"></a>数据分析流程</h3><h4 id="bulk-1"><a href="#bulk-1" class="headerlink" title="bulk"></a>bulk</h4><p><img src="https://gitee.com/feilijiang/img/raw/master/20211208212649.png" alt=""></p>
<p><strong>1. Input</strong></p>
<ul>
<li>fastq files(fastqsanger.gz), </li>
<li>bed files(encodepeak) ,</li>
<li>genome: hg38,bed files chrom, start,end, name , score </li>
</ul>
<blockquote>
<p>bed格式;</p>
</blockquote>
<p><strong>2. Quality Control</strong></p>
<ul>
<li>Purpose:fastq质控</li>
<li>Tool: fastqc</li>
<li>Input:R1.fastq,R2.fastq</li>
<li>注意点:<ul>
<li>总reads数量</li>
<li>per base sequence content: 由于转座酶的strong sequence bias,文献: “Insertion site preference of Mu, Tn5, and Tn7 transposons”</li>
<li>sequence duplication levels:PCR duplicates,后续去除</li>
<li>overrepresented sequences:adapter序列,需要用cutadapt移除</li>
</ul>
</li>
</ul>
<p><strong>3. Trimming Reads</strong></p>
<ul>
<li>Purpose:用cutadapt移除adapter序列,用fastqc再次检查序列中adapta的含量</li>
<li>Tool: cutadapt,fastqc</li>
<li>Input:<ul>
<li>adapter的序列,</li>
<li>R1.fastq,R2.fastq</li>
</ul>
</li>
<li>Output:<ul>
<li>R1_cutAda.fastq,R2_cutAda.fastq及报告</li>
</ul>
</li>
</ul>
<p><strong>4. Mapping</strong></p>
<ul>
<li>Purpose:Mapping Reads to Reference Genome</li>
<li>Tool: Bowtie2</li>
<li>Input:<ul>
<li>R1_cutAda.fastq,R2_cutAda.fastq</li>
<li>genome.bed</li>
</ul>
</li>
<li>Output:BAM 文件<ul>
<li>插入片段长度:POS-MPOS+CIGAR</li>
<li>POS: leftmost position of where this alignment maps to the reference</li>
<li>MPOS: leftmost position of where the next alignment in this group maps to the reference, MPOS or PNEXT.</li>
<li>CICAR:string indicating alignment information that allows the storing of clipped</li>
</ul>
</li>
<li>Parameters:<ul>
<li>fragment length:500-1000</li>
<li>end_to_end: 不需要剪掉末端</li>
<li>mode–very_sensitive</li>
</ul>
</li>
</ul>
<ul>
<li>注意点:<ul>
<li>dovetailing:cutadapt识别至少三个碱基以上的短序列,因此cutadapter之后的序列可能包含 1-2 个接头碱基并超出其配对起始位点。需要加入比对中。</li>
<li>结果参数:unique mapping rate,multi-mapping多的可能原因:1. 使用–very-sensitive的模式,即使第二次命中的质量比第一次低得多,Bowtie2 也会将读取视为多重映射。另一个原因是我们读取了线粒体基因组的映射。线粒体基因组有很多具有相似序列的区域。</li>
</ul>
</li>
</ul>
<p><strong>5. Filtering Mapped Reads</strong></p>
<ul>
<li><ol>
<li>Filter Uninformative Reads<ul>
<li>Purpose:线粒体基因组nucleosome-free,并且Tn5可及。过滤线粒体reads,低对比质量reads,not properly paired</li>
<li>Input: aligned.bam</li>
<li>Tool: FilterBAM</li>
<li>Parameters: isProperPair:Yes, reference:!chrM, mapQuality:>=30</li>
<li>Output: aligned_filter.bam</li>
<li>注意点:如果想保留比对到重复区域的reads, mapQuality可以降低</li>
<li>线粒体reads的数量:使用Samtools idxstats对aligned.bam统计得到: chromosome name, chromosome length, number of reads mapping to the chromosome, number of unaligned mate whose mate is mapping to the chromosome<br><img src="https://gitee.com/feilijiang/img/raw/master/20211208163842.png" alt=""></li>
</ul>
</li>
</ol>
</li>
<li><ol start="2">
<li>Filter Duplicate Reads<ul>
<li>Purpose:由于Tn5的插入随机,所以两个比对序列完全一样的序列被认为是PCR duplicates</li>
<li>Tool: Picard MarkDuplicates</li>
<li>Parameter:默认值标记duplicates,需要设置“If true do not write duplicates to the output file instead of writing them with appropriate flags“为yes才是去除。</li>
<li>Output:bam文件</li>
<li>Duplicates的数量在metric的文件夹里面,UNPAIRED_READS_EXAMINED, READ_PAIR_DUPLICATES</li>
</ul>
</li>
</ol>
</li>
<li><ol start="3">
<li>Check Insert Sizes<ul>
<li>Purpose:Paired-end histogram of insert size frequency,插入片段长度是ATAC-seq一个很重要的指标。</li>
<li>Tools: CollectInsertSizeMetrics (picard)</li>
<li>结果解读: The first peak (50bp) corresponds to where the Tn5 transposase inserted into nucleosome-free regions. The second peak (a bit less than 200bp) corresponds to where Tn5 inserted around a single nucleosome. The third one (around 400bp) is where Tn5 inserted around two adjacent nucleosomes and the fourth one (around 600bp) is where Tn5 inserted around three adjacent nucleosomes.<br><img src="https://gitee.com/feilijiang/img/raw/master/20211208172519.png" alt=""></li>
</ul>
</li>
</ol>
</li>
</ul>
<p><strong>6. Peak calling</strong></p>
<ul>
<li><p>Call peaks</p>
<ul>
<li>Purpose:将片段信息数字化</li>
<li>Input</li>
<li>Output</li>
<li>Tool:MACS(使用的更加广泛)和Genrich(适用于ATAC,但是more reads,less peaks)</li>
<li>Mode: 1.第一个是仅选择片段长度低于 100bp 对应于无核小体区域的配对,并使用峰值调用,就像您对 ChIP-seq 所做的那样,连接配偶之间的信号。 这种方法的缺点是只有在有双端数据时才能使用它,并且会错过只有一个 Tn5 绑定的小开放区域。2.第二个是使用所有读取来更加详尽。 在这种方法中,将每个读取的信号重新集中在 5’ 末端(读取起始位点)非常重要,因为这是 Tn5 切割的地方。 实际上,您希望峰位于核小体周围,而不是直接位于核小体上。</li>
<li>Parameters:</li>
<li>注意点:<img src="https://gitee.com/feilijiang/img/raw/master/20211208175230.png" alt=""><br><img src="https://gitee.com/feilijiang/img/raw/master/20211208175501.png" alt=""></li>
</ul>
</li>
<li><p>Using MACS2</p>
<ul>
<li><ol>
<li>Convert bam to bed</li>
</ol>
</li>
<li><ul>
<li>bedtools: Bam to Bed<br>Input: markdup.bam<br>Output: bed file<br><img src="https://gitee.com/feilijiang/img/raw/master/20211208175526.png" alt=""></li>
</ul>
</li>
<li><ol start="2">
<li>Call peaks with MACS2</li>
</ol>
</li>
<li><ul>
<li>单端测序需要设置偏移模型,双端测序不需要。在chip_seq等数据中科学家发现在真实的结合位点两侧,正负链的测序深度分布如下图所示,对应峰值的中心距离peak中心有一定的偏移。MACS首先通过一个模型来评估真实的peak中心和测序峰值的偏移距离,给定参数bandwidth和mfold, 采用一个大小为2倍bandwidth的滑动窗口,比较该窗口内真实测序深度的分布与随机测序的差异,如果二者的差异倍数超过了阈值mfold,则认为该窗口是一个peak区域。识别到初始的peak区域之后,随机挑选1000个高可信度的peak区域,分别计算正链和负链的测序深度分布。通过这种方式识别到正负链峰值之间的距离,定义为d。在后续peak calling时,会在初始计算结果的基础上向3’端偏移d/2的距离。</li>
</ul>
</li>
<li><p>-泊松模型,landa可调<br><img src="https://gitee.com/feilijiang/img/raw/master/20211208200632.png" alt=""></p>
</li>
<li><ul>
<li>Input: bed file</li>
<li><ul>
<li>Parameters:</li>
</ul>
</li>
</ul>
</li>
<li><ul>
<li><ul>
<li>We call peaks with MACS2. In order to get the coverage centered on the 5’ extended 100bp each side we will use –shift -100 and –extend 200:</li>
</ul>
</li>
</ul>
</li>
<li><ul>
<li><ul>
<li>“How many duplicate tags at the exact same location are allowed?”: all<br><img src="https://gitee.com/feilijiang/img/raw/master/20211208181000.png" alt=""></li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
</ul>
<p><a href="https://blog.csdn.net/u012110870/article/details/102804191" target="_blank" rel="noopener">https://blog.csdn.net/u012110870/article/details/102804191</a><br><a href="https://zhuanlan.zhihu.com/p/90180058" target="_blank" rel="noopener">https://zhuanlan.zhihu.com/p/90180058</a></p>
<p><strong>7. Visualisation of Coverage</strong></p>
<ul>
<li><p>1.Prepare the Datasets</p>
<ul>
<li><p>Extract CTCF peaks on chr22 in intergenic regions<br>ATAC的数据期待会在TSS区域附近富集。同时只有好的ATAC的数据才会测到在intergenic CTCF区域富集。事实上,CTCF 蛋白能够定位核小体并创建一个大约 120bp 的核小体耗尽区。 这比 TSS 周围的 200bp 无核小体区域小,也可能不存在于所有细胞中。</p>
</li>
<li><ul>
<li>提取chr22上面的CTCF peaks:</li>
</ul>
</li>
<li><ul>
<li>Filter data on any column using simple expressions </li>
</ul>
</li>
<li><ul>
<li>bedtools Intersect intervals find overlapping intervals in various ways</li>
</ul>
</li>
<li><p>Convert bedgraph from MACS2 to bigwig</p>
</li>
<li><ul>
<li>由于MACS2生成的bedgraph文件较大且难以可视化,转化为bigwig的二进制文件。</li>
</ul>
</li>
</ul>
</li>
<li><p>2.Create heatmap of coverage at TSS with deepTools<br>分别做TSS和CTCF附近的富集图</p>
<ul>
<li>Generate computeMatrix</li>
<li><ul>
<li>Purpose: 确认特殊区域的覆盖度</li>
</ul>
</li>
<li><ul>
<li>Tools:computeMatrix:evaluate the coverage at each locus we are interested in.</li>
</ul>
</li>
<li><ul>
<li>Input:bigwig文件</li>
</ul>
</li>
<li><ul>
<li>Output:compute的matrix</li>
</ul>
</li>
<li>Plot with plotHeatmap</li>
<li><ul>
<li>热图解读:每一行是一条transcript(非reads),覆盖度no-max,red-blue,TSS放中间,TSS 2kb的被展示。热图上方的是TSS附近的平均信号。一个bigwi件一张热图。coverage并非对称,一般在左边高,因为左边一般是激活基因的promoter的可及。</li>
</ul>
</li>
<li><ul>
<li>如果是CTCF的区域,需要intergenic CTCF peaks chr22的reference来进行computeMatrix一步,然后再plotHeatmap。结果更加对称。</li>
</ul>
</li>
</ul>
</li>
</ul>
<ul>
<li>3.Visualise Regions with pyGenomeTracks<ul>
<li>对于一个特殊区域的可视化,比如几个基因,可以用igv,UCSCC browser或者pyGenomeTracks</li>
<li>Tool:pyGenomeTracks</li>
<li>结果;如果peak没有和TSS或者CTCF重合的区域,可能是enhancer,但是需要进一步确认。</li>
</ul>
</li>
</ul>
<h4 id="single-cell"><a href="#single-cell" class="headerlink" title="single-cell"></a>single-cell</h4><p>snaptools是任斌实验室开放的scATAC-seq的工具,包含上游分析和下游分析两块。<br>网址:<a href="https://github.com/r3fang/SnapTools" target="_blank" rel="noopener">https://github.com/r3fang/SnapTools</a></p>
<p><a href="https://github.com/r3fang/SnapATAC/wiki/FAQs" target="_blank" rel="noopener">https://github.com/r3fang/SnapATAC/wiki/FAQs</a></p>
<ol>
<li>Index Reference Genome<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line"> $ which bwa</span><br><span class="line">/opt/biotools/bwa/bin/bwa</span><br><span class="line">$ snaptools index-genome \</span><br><span class="line"> --input-fasta=mm10.fa \</span><br><span class="line"> --output-prefix=mm10 \</span><br><span class="line"> --aligner=bwa \</span><br><span class="line"> --path-to-aligner=/opt/biotools/bwa/bin/ \</span><br><span class="line"> --num-threads=5</span><br></pre></td></tr></table></figure>
</li>
</ol>
<ol start="2">
<li>Alignment<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br></pre></td><td class="code"><pre><span class="line">$ snaptools align-paired-end \</span><br><span class="line"> --input-reference=mm10.fa \</span><br><span class="line"> --input-fastq1=demo.R1.fastq.gz \</span><br><span class="line"> --input-fastq2=demo.R2.fastq.gz \</span><br><span class="line"> --output-bam=demo.bam \</span><br><span class="line"> --aligner=bwa \</span><br><span class="line"> --path-to-aligner=/opt/biotools/bwa/bin/ \</span><br><span class="line"> --read-fastq-command=zcat \</span><br><span class="line"> --min-cov=0 \</span><br><span class="line"> --num-threads=5 \</span><br><span class="line"> --if-sort=True \</span><br><span class="line"> --tmp-folder=./ \</span><br><span class="line"> --overwrite=TRUE</span><br></pre></td></tr></table></figure>