%PDF-1.3 % 1 0 obj << /Type /Pages /Count 7 /Kids [ 3 0 R 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 9 0 R ] >> endobj 2 0 obj << /Producer (PyPDF2) /Title (The\040Genome\040Analysis\040Toolkit\072\040A\040MapReduce\040framework\040for\040analyzing\040next\055generation\040DNA\040sequencing\040data) /Author (Aaron\040McKenna\054\040Matthew\040Hanna\054\040Eric\040Banks\054\040Andrey\040Sivachenko\054\040Kristian\040Cibulskis\054\040Andrew\040Kernytsky\054\040Kiran\040V\056\040Garimella\054\040David\040Altshuler\054\040Stacey\040Gabriel\054\040Mark\040J\056\040Daly\054\040Mark\040A\056\040DePristo) /Subject (Next\055generation\040DNA\040sequencing\040\050NGS\051\040projects\054\040such\040as\040the\0401000\040Genomes\040Project\054\040are\040already\040revolutionizing\040our\040understanding\040of\040genetic\040variation\040among\040individuals\056\040However\054\040the\040massive\040data\040sets\040generated\040by\040NGS\204the\0401000\040Genome\040pilot\040alone\040includes\040nearly\040five\040terabases\204make\040writing\040feature\055rich\054\040efficient\054\040and\040robust\040analysis\040tools\040difficult\040for\040even\040computationally\040sophisticated\040individuals\056\040Indeed\054\040many\040professionals\040are\040limited\040in\040the\040scope\040and\040the\040ease\040with\040which\040they\040can\040answer\040scientific\040questions\040by\040the\040complexity\040of\040accessing\040and\040manipulating\040the\040data\040produced\040by\040these\040machines\056\040Here\054\040we\040discuss\040our\040Genome\040Analysis\040Toolkit\040\050GATK\051\054\040a\040structured\040programming\040framework\040designed\040to\040ease\040the\040development\040of\040efficient\040and\040robust\040analysis\040tools\040for\040next\055generation\040DNA\040sequencers\040using\040the\040functional\040programming\040philosophy\040of\040MapReduce\056\040The\040GATK\040provides\040a\040small\040but\040rich\040set\040of\040data\040access\040patterns\040that\040encompass\040the\040majority\040of\040analysis\040tool\040needs\056\040Separating\040specific\040analysis\040calculations\040from\040common\040data\040management\040infrastructure\040enables\040us\040to\040optimize\040the\040GATK\040framework\040for\040correctness\054\040stability\054\040and\040CPU\040and\040memory\040efficiency\040and\040to\040enable\040distributed\040and\040shared\040memory\040parallelization\056\040We\040highlight\040the\040capabilities\040of\040the\040GATK\040by\040describing\040the\040implementation\040and\040application\040of\040robust\054\040scale\055tolerant\040tools\040like\040coverage\040calculators\040and\040single\040nucleotide\040polymorphism\040\050SNP\051\040calling\056\040We\040conclude\040that\040the\040GATK\040programming\040framework\040enables\040developers\040and\040analysts\040to\040quickly\040and\040easily\040write\040efficient\040and\040robust\040NGS\040tools\054\040many\040of\040which\040have\040already\040been\040incorporated\040into\040large\055scale\040sequencing\040projects\040like\040the\0401000\040Genomes\040Project\040and\040The\040Cancer\040Genome\040Atlas\056) /Keywords (01\040Sep\0402010\054\040Genome\040Research\054\040Variant\040Call\040Format\054\040Software\040framework\054\040Broad\040Institute) >> endobj 3 0 obj << /CropBox [ 0 0 612 792 ] /Parent 1 0 R /Contents [ 11 0 R 12 0 R 13 0 R 14 0 R 15 0 R 16 0 R 17 0 R 18 0 R ] /Rotate 0 /MediaBox [ 0 0 612 792 ] /Thumb 19 0 R /Resources 22 0 R /Type /Page >> endobj 4 0 obj << /CropBox [ 0 0 612 792 ] /Parent 1 0 R /Contents 54 0 R /Rotate 0 /MediaBox [ 0 0 612 792 ] /Thumb 55 0 R /Resources 56 0 R /Type /Page >> endobj 5 0 obj << /CropBox [ 0 0 612 792 ] /Parent 1 0 R /Contents 66 0 R /Rotate 0 /MediaBox [ 0 0 612 792 ] /Thumb 67 0 R /Resources 68 0 R /Type /Page >> endobj 6 0 obj << /CropBox [ 0 0 612 792 ] /Parent 1 0 R /Contents 72 0 R /Rotate 0 /MediaBox [ 0 0 612 792 ] /Thumb 73 0 R /Resources 74 0 R /Type /Page >> endobj 7 0 obj << /CropBox [ 0 0 612 792 ] /Parent 1 0 R /Contents 77 0 R /Rotate 0 /MediaBox [ 0 0 612 792 ] /Thumb 78 0 R /Resources 79 0 R /Type /Page >> endobj 8 0 obj << /CropBox [ 0 0 612 792 ] /Parent 1 0 R /Contents 106 0 R /Rotate 0 /MediaBox [ 0 0 612 792 ] /Thumb 107 0 R /Resources 108 0 R /Type /Page >> endobj 9 0 obj << /CropBox [ 0 0 612 792 ] /Parent 1 0 R /Contents 115 0 R /Rotate 0 /MediaBox [ 0 0 612 792 ] /Thumb 116 0 R /Resources 117 0 R /Type /Page >> endobj 10 0 obj << /Type /Catalog /Pages 1 0 R >> endobj 11 0 obj << /Filter /FlateDecode /Length 874 >> stream HUMo8W,''nQOE8ZTKI_Cvj )ͼ73|mQ ;ZXKQh!1VaUA"`]C1ñ]hBR(l/śd^VR9_g>iyQnf!EYzn+V,#2-f߯Da"3T>sN8ڇn3h\7}Tv]n?<-Vh>"XASL,UE 6d-0C] M+|tCӅZA\#t ևP7~x_O)朱ÈxՍlz0XGAH% 44A 3BfR#ƥ8#K)͉LIulD!Kg[:s@dZ4?'B5zaM !L*$?u\֛CBEWͧmۯd$kqjxj:JύyϓR 4 DHKFy1KC koi4J5p_R.cS'Jh ?l['52sDMjg4wFDvRAb&4=1I Kz|K''WfMs·_0q^qIݔ8(Q<D