-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathOped_Reanalysis.bib
173 lines (169 loc) · 17.5 KB
/
Oped_Reanalysis.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
Automatically generated by Mendeley Desktop 1.17.13
Any changes to this file will be lost if it is regenerated by Mendeley.
BibTeX export options can be customized via Preferences -> BibTeX in Mendeley Desktop
@article{Huang1999,
abstract = {We describe the third generation of the CAP sequence assembly program. The CAP3 program includes a number of improvements and new features. The program has a capability to clip 5' and 3' low-quality regions of reads. It uses base quality values in computation of overlaps between reads, construction of multiple sequence alignments of reads, and generation of consensus sequences. The program also uses forward-reverse constraints to correct assembly errors and link contigs. Results of CAP3 on four BAC data sets are presented. The performance of CAP3 was compared with that of PHRAP on a number of BAC data sets. PHRAP often produces longer contigs than CAP3 whereas CAP3 often produces fewer errors in consensus sequences than PHRAP. It is easier to construct scaffolds with CAP3 than with PHRAP on low-pass data with forward-reverse constraints.},
author = {Huang, X and Madan, A},
issn = {1088-9051},
journal = {Genome research},
month = {sep},
number = {9},
pages = {868--787},
pmid = {10508846},
title = {{CAP3: A DNA sequence assembly program.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/10508846},
volume = {9},
year = {1999}
}
@article {Johnson2018,
author = {Johnson, Lisa Kristine and Alexander, Harriet and Brown, C. Titus},
title = {Re-assembly, quality evaluation, and annotation of 678 microbial eukaryotic reference transcriptomes},
year = {2018},
doi = {10.1101/323576},
publisher = {Cold Spring Harbor Laboratory},
abstract = {Background: De novo transcriptome assemblies are required prior to analyzing RNAseq data from a species without an existing reference genome or transcriptome. Despite the prevalence of transcriptomic studies, the effects of using different workflows, or {\textquoteright}pipelines{\textquoteright}, on the resulting assemblies are poorly understood. Here, a pipeline was programmatically automated and used to assemble and annotate raw transcriptomic short read data collected by the Marine Microbial Eukaryotic Transcriptome Sequencing Project (MMETSP). Transcriptome assemblies generated through this pipeline were evaluated and compared against assemblies that were previously generated with a pipeline developed by the National Center for Genome Research (NCGR). Findings: New transcriptome assemblies contained 70\% of the previous contigs as well as new content. On average, 7.8\% of the annotated contigs in the new assemblies were novel gene names not found in the previous assemblies. Taxonomic trends were observed in the assembly metrics, with assemblies from the Dinoflagellata and Ciliophora phyla showing a higher percentage of open reading frames and number of contigs than transcriptomes from other phyla. Conclusions: Given current bioinformatics approaches, there is no single {\textquoteright}best{\textquoteright} reference transcriptome for a particular set of raw data. As the optimum transcriptome is a moving target, improving (or not) with new tools and approaches, automated and programmable pipelines are invaluable for managing the computationally-intensive tasks required for re-processing large sets of samples with revised pipelines. Moreover, automated and programmable pipelines facilitate the comparison of diverse sets of data by ensuring a common evaluation workflow was applied to all samples. Thus, re-assembling existing data with new tools using automated and programmable pipelines may yield more accurate identification of taxon-specific trends across samples in addition to novel and useful products for the community.},
URL = {https://www.biorxiv.org/content/early/2018/05/17/323576},
eprint = {https://www.biorxiv.org/content/early/2018/05/17/323576.full.pdf},
journal = {bioRxiv}
}
@article{Shumway2009,
abstract = {Next generation sequencing platforms are producing biological sequencing data in unprecedented amounts. The partners of the International Nucleotide Sequencing Database Collaboration, which includes the National Center for Biotechnology Information (NCBI), the European Bioinformatics Institute (EBI), and the DNA Data Bank of Japan (DDBJ), have established the Sequence Read Archive (SRA) to provide the scientific community with an archival destination for next generation data sets. The SRA is now accessible at http://www.ncbi.nlm.nih.gov/Traces/sra from NCBI, at http://www.ebi.ac.uk/ena from EBI and at http://www.ddbj.nig.ac.jp/sub/trace{\_}sra-e.html from DDBJ. Users of these resources can obtain data sets deposited in any of the three SRA instances. Links and submission instructions are provided.},
author = {Shumway, Martin and Cochrane, Guy and Sugawara, Hideaki},
doi = {10.1093/nar/gkp1078},
file = {:Users/halexand/Library/Application Support/Mendeley Desktop/Downloaded/Shumway, Cochrane, Sugawara - 2010 - Archiving next generation sequencing data.pdf:pdf},
isbn = {1362-4962 (Electronic) 0305-1048 (Linking)},
issn = {03051048},
journal = {Nucleic Acids Research},
month = {jan},
number = {},
pages = {D870--D871},
pmid = {19965774},
publisher = {Oxford University Press},
title = {{Archiving next generation sequencing data}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/19965774},
volume = {38},
year = {2009}
}
@incollection{Callahan2013,
author = {Callahan, Alison and Cruz-Toledo, Jos{\'{e}} and Ansell, Peter and Dumontier, Michel},
booktitle = {The Semantic Web: Semantics and Big Data},
doi = {10.1007/978-3-642-38288-8_14},
pages = {200--212},
publisher = {Springer, Berlin, Heidelberg},
title = {{Bio2RDF Release 2: Improved Coverage, Interoperability and Provenance of Life Science Linked Data}},
url = {http://link.springer.com/10.1007/978-3-642-38288-8{\_}14},
year = {2013}
}
@article{Bechhofer2013,
abstract = {Scientific data represents a significant portion of the linked open data cloud and scientists stand to benefit from the data fusion capability this will afford. Publishing linked data into the cloud, however, does not ensure the required reusability. Publishing has requirements of provenance, quality, credit, attribution and methods to provide the reproducibility that enables validation of results. In this paper we make the case for a scientific data publication model on top of linked data and introduce the notion of Research Objects as first class citizens for sharing and publishing.},
author = {Bechhofer, Sean and Buchan, Iain and {De Roure}, David and Missier, Paolo and Ainsworth, John and Bhagat, Jiten and Couch, Philip and Cruickshank, Don and Delderfield, Mark and Dunlop, Ian and Gamble, Matthew and Michaelides, Danius and Owen, Stuart and Newman, David and Sufi, Shoaib and Goble, Carole},
doi = {10.1016/j.future.2011.08.004},
file = {:Users/halexand/Library/Application Support/Mendeley Desktop/Downloaded/Bechhofer et al. - 2013 - Why linked data is not enough for scientists.pdf:pdf},
issn = {0167739X},
journal = {Future Generation Computer Systems},
number = {2},
pages = {599--611},
title = {{Why linked data is not enough for scientists}},
volume = {29},
year = {2013}
}
@article{Simpson2009,
abstract = {Widespread adoption of massively parallel deoxyribonucleic acid (DNA) sequencing instruments has prompted the recent development of de novo short read assembly algorithms. A common shortcoming of the available tools is their inability to efficiently assemble vast amounts of data generated from large-scale sequencing projects, such as the sequencing of individual human genomes to catalog natural genetic variation. To address this limitation, we developed ABySS (Assembly By Short Sequences), a parallelized sequence assembler. As a demonstration of the capability of our software, we assembled 3.5 billion paired-end reads from the genome of an African male publicly released by Illumina, Inc. Approximately 2.76 million contigs {\textgreater} or =100 base pairs (bp) in length were created with an N50 size of 1499 bp, representing 68{\%} of the reference human genome. Analysis of these contigs identified polymorphic and novel sequences not present in the human reference assembly, which were validated by alignment to alternate human assemblies and to other primate genomes.},
author = {Simpson, J. T. and Wong, K. and Jackman, S. D. and Schein, J. E. and Jones, S. J.M. and Birol, I.},
doi = {10.1101/gr.089532.108},
issn = {1088-9051},
journal = {Genome Research},
month = {jun},
number = {6},
pages = {1117--1123},
pmid = {19251739},
title = {{ABySS: A parallel assembler for short read sequence data}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/19251739},
volume = {19},
year = {2009}
}
@article{Keeling2014,
abstract = {Current sampling of genomic sequence data from eukaryotes is relatively poor, biased, and inadequate to address important questions about their biology, evolution, and ecology; this Community Page describes a resource of 700 transcriptomes from marine microbial eukaryotes to help understand their role in the world's oceans.},
archivePrefix = {arXiv},
arxivId = {arXiv:cond-mat/0402594v3},
author = {Keeling, Patrick J. and Burki, Fabien and Wilcox, Heather M. and Allam, Bassem and Allen, Eric E. and Amaral-Zettler, Linda A. and Armbrust, E. Virginia and Archibald, John M. and Bharti, Arvind K. and Bell, Callum J. and Beszteri, Bank and Bidle, Kay D. and Cameron, Connor T. and Campbell, Lisa and Caron, David A. and Cattolico, Rose Ann and Collier, Jackie L. and Coyne, Kathryn and Davy, Simon K. and Deschamps, Phillipe and Dyhrman, Sonya T. and Edvardsen, Bente and Gates, Ruth D. and Gobler, Christopher J. and Greenwood, Spencer J. and Guida, Stephanie M. and Jacobi, Jennifer L. and Jakobsen, Kjetill S. and James, Erick R. and Jenkins, Bethany and John, Uwe and Johnson, Matthew D. and Juhl, Andrew R. and Kamp, Anja and Katz, Laura A. and Kiene, Ronald and Kudryavtsev, Alexander and Leander, Brian S. and Lin, Senjie and Lovejoy, Connie and Lynn, Denis and Marchetti, Adrian and McManus, George and Nedelcu, Aurora M. and Menden-Deuer, Susanne and Miceli, Cristina and Mock, Thomas and Montresor, Marina and Moran, Mary Ann and Murray, Shauna and Nadathur, Govind and Nagai, Satoshi and Ngam, Peter B. and Palenik, Brian and Pawlowski, Jan and Petroni, Giulio and Piganeau, Gwenael and Posewitz, Matthew C. and Rengefors, Karin and Romano, Giovanna and Rumpho, Mary E. and Rynearson, Tatiana and Schilling, Kelly B. and Schroeder, Declan C. and Simpson, Alastair G B and Slamovits, Claudio H. and Smith, David R. and Smith, G. Jason and Smith, Sarah R. and Sosik, Heidi M. and Stief, Peter and Theriot, Edward and Twary, Scott N. and Umale, Pooja E. and Vaulot, Daniel and Wawrik, Boris and Wheeler, Glen L. and Wilson, William H. and Xu, Yan and Zingone, Adriana and Worden, Alexandra Z.},
doi = {10.1371/journal.pbio.1001889},
eprint = {0402594v3},
isbn = {1545-7885 (Electronic) 1544-9173 (Linking)},
issn = {15457885},
journal = {PLoS Biology},
month = {jun},
number = {6},
pages = {e1001889},
pmid = {24959919},
primaryClass = {arXiv:cond-mat},
title = {{The Marine Microbial Eukaryote Transcriptome Sequencing Project (MMETSP): Illuminating the functional diversity of eukaryotic life in the oceans through transcriptome sequencing}},
url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4068987},
volume = {12},
year = {2014}
}
@article{Piwowar2013,
author = {Piwowar, Heather A. and Vision, Todd J.},
doi = {10.7717/peerj.175},
file = {:Users/halexand/Library/Application Support/Mendeley Desktop/Downloaded/Piwowar, Vision - 2013 - Data reuse and the open data citation advantage.pdf:pdf},
issn = {2167-8359},
journal = {PeerJ},
keywords = {Bibliometrics,Data archiving,Data repositories,Data reuse,Gene expression microarray,Incentives,Information science,Open data},
month = {oct},
pages = {e175},
publisher = {PeerJ Inc.},
title = {{Data reuse and the open data citation advantage}},
url = {https://peerj.com/articles/175},
volume = {1},
year = {2013}
}
@article{Kodama2012,
abstract = {New generation sequencing platforms are producing data with significantly higher throughput and lower cost. A portion of this capacity is devoted to individual and community scientific projects. As these projects reach publication, raw sequencing datasets are submitted into the primary next-generation sequence data archive, the Sequence Read Archive (SRA). Archiving experimental data is the key to the progress of reproducible science. The SRA was established as a public repository for next-generation sequence data as a part of the International Nucleotide Sequence Database Collaboration (INSDC). INSDC is composed of the National Center for Biotechnology Information (NCBI), the European Bioinformatics Institute (EBI) and the DNA Data Bank of Japan (DDBJ). The SRA is accessible at www.ncbi.nlm.nih.gov/sra from NCBI, at www.ebi.ac.uk/ena from EBI and at trace.ddbj.nig.ac.jp from DDBJ. In this article, we present the content and structure of the SRA and report on updated metadata structures, submission file formats and supported sequencing platforms. We also briefly outline our various responses to the challenge of explosive data growth.},
author = {Kodama, Yuichi and Shumway, Martin and Leinonen, Rasko},
doi = {10.1093/nar/gkr854},
file = {:Users/halexand/Library/Application Support/Mendeley Desktop/Downloaded/Kodama et al. - 2012 - The Sequence Read Archive explosive growth of sequencing data.pdf:pdf},
isbn = {1362-4962 (Electronic)$\backslash$r0305-1048 (Linking)},
issn = {03051048},
journal = {Nucleic Acids Research},
month = {jan},
number = {D1},
pages = {D54--6},
pmid = {22009675},
publisher = {Oxford University Press},
title = {{The sequence read archive: Explosive growth of sequencing data}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/22009675},
volume = {40},
year = {2012}
}
@article{Caron2016,
abstract = {Protists, which are single-celled eukaryotes, critically influence the ecology and chemistry of marine ecosystems, but genome-based studies of these organisms have lagged behind those of other microorganisms. However, recent transcriptomic studies of cultured species, complemented by meta-omics analyses of natural communities, have increased the amount of genetic information available for poorly represented branches on the tree of eukaryotic life. This information is providing insights into the adaptations and interactions between protists and other microorganisms and macroorganisms, but many of the genes sequenced show no similarity to sequences currently available in public databases. A better understanding of these newly discovered genes will lead to a deeper appreciation of the functional diversity and metabolic processes in the ocean. In this Review, we summarize recent developments in our understanding of the ecology, physiology and evolution of protists, derived from transcriptomic studies of cultured strains and natural communities, and discuss how these novel large-scale genetic datasets will be used in the future.},
author = {Caron, David A. and Alexander, Harriet and Allen, Andrew E. and Archibald, John M. and Armbrust, E. Virginia and Bachy, Charles and Bell, Callum J. and Bharti, Arvind and Dyhrman, Sonya T. and Guida, Stephanie M. and Heidelberg, Karla B. and Kaye, Jonathan Z. and Metzner, Julia and Smith, Sarah R. and Worden, Alexandra Z.},
doi = {10.1038/nrmicro.2016.160},
issn = {1740-1526},
journal = {Nature Reviews Microbiology},
month = {nov},
number = {1},
pages = {6--20},
publisher = {Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
title = {{Probing the evolution, ecology and physiology of marine protists using transcriptomics}},
url = {http://www.nature.com/doifinder/10.1038/nrmicro.2016.160},
volume = {15},
year = {2016}
}
@article{Grabherr2011,
abstract = {Massively parallel sequencing of cDNA has enabled deep and efficient probing of transcriptomes. Current approaches for transcript reconstruction from such data often rely on aligning reads to a reference genome, and are thus unsuitable for samples with a partial or missing reference genome. Here we present the Trinity method for de novo assembly of full-length transcripts and evaluate it on samples from fission yeast, mouse and whitefly, whose reference genome is not yet available. By efficiently constructing and analyzing sets of de Bruijn graphs, Trinity fully reconstructs a large fraction of transcripts, including alternatively spliced isoforms and transcripts from recently duplicated genes. Compared with other de novo transcriptome assemblers, Trinity recovers more full-length transcripts across a broad range of expression levels, with a sensitivity similar to methods that rely on genome alignments. Our approach provides a unified solution for transcriptome reconstruction in any sample, especially in the absence of a reference genome.},
author = {Grabherr, Manfred G and Haas, Brian J and Yassour, Moran and Levin, Joshua Z and Thompson, Dawn A and Amit, Ido and Adiconis, Xian and Fan, Lin and Raychowdhury, Raktima and Zeng, Qiandong and Chen, Zehua and Mauceli, Evan and Hacohen, Nir and Gnirke, Andreas and Rhind, Nicholas and di Palma, Federica and Birren, Bruce W and Nusbaum, Chad and Lindblad-Toh, Kerstin and Friedman, Nir and Regev, Aviv},
doi = {10.1038/nbt.1883},
file = {:Users/halexand/Library/Application Support/Mendeley Desktop/Downloaded/Grabherr et al. - 2011 - Full-length transcriptome assembly from RNA-Seq data without a reference genome(2).pdf:pdf},
issn = {1546-1696},
journal = {Nature biotechnology},
month = {may},
number = {7},
pages = {644--52},
pmid = {21572440},
publisher = {NIH Public Access},
title = {{Full-length transcriptome assembly from RNA-Seq data without a reference genome.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/21572440},
volume = {29},
year = {2011}
}