@article{10.1371/journal.pcbi.1003737, author = {Zhang, , Yuan AND Sun, , Yanni AND Cole, , James R.}, journal = {PLoS Comput Biol}, publisher = {Public Library of Science}, title = {A Scalable and Accurate Targeted Gene Assembly Tool (SAT-Assembler) for Next-Generation Sequencing Data}, year = {2014}, month = {08}, volume = {10}, url = {http://dx.doi.org/10.1371%2Fjournal.pcbi.1003737}, pages = {e1003737}, abstract = {Author Summary

Next-generation sequencing (NGS) provides an efficient and affordable way to sequence the genomes or transcriptomes of a large amount of organisms. With fast accumulation of the sequencing data from various NGS projects, the bottleneck is to efficiently mine useful knowledge from the data. As NGS platforms usually generate short and fragmented sequences (reads), one key step to annotate NGS data is to assemble short reads into longer contigs, which are then used to recover functional elements such as protein-coding genes. Short read assembly remains one of the most difficult computational problems in genomics. In particular, the performance of existing assembly tools is not satisfactory on complicated NGS data sets. They cannot reliably separate genes of high similarity, recover under-represented genes, and incur high computational time and memory usage. Hence, we propose a targeted gene assembly tool, SAT-Assembler, to assemble genes of interest directly from NGS data with low memory usage and high accuracy. Our experimental results on a transcriptomic data set and two microbial community data sets showed that SAT-Assembler used less memory and recovered more target genes with better accuracy than existing tools.

}, number = {8}, doi = {10.1371/journal.pcbi.1003737} }