Skip to content

Instantly share code, notes, and snippets.

@ElDeveloper
Created September 8, 2016 21:52
Show Gist options
  • Save ElDeveloper/13289d380c36c6d7efb6e43bfba263fb to your computer and use it in GitHub Desktop.
Save ElDeveloper/13289d380c36c6d7efb6e43bfba263fb to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"import pandas as pd, numpy as np\n",
"from emperor import Emperor, nbinstall\n",
"from skbio import OrdinationResults\n",
"\n",
"from emperor.qiime_backports.parse import parse_mapping_file\n",
"from emperor.qiime_backports.format import format_mapping_file\n",
"\n",
"from skbio.io.util import open_file\n",
"\n",
"nbinstall()\n",
"\n",
"def load_mf(fn):\n",
" with open_file(fn) as f:\n",
" mapping_data, header, _ = parse_mapping_file(f)\n",
" _mapping_file = pd.DataFrame(mapping_data, columns=header)\n",
" _mapping_file.set_index('SampleID', inplace=True)\n",
" return _mapping_file\n",
"\n",
"def write_mf(f, _df):\n",
" with open(f, 'w') as fp:\n",
" lines = format_mapping_file(['SampleID'] + _df.columns.tolist(),\n",
" list(_df.itertuples()))\n",
" fp.write(lines+'\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We are going to load data from [Fierer et al. 2010](http://www.pnas.org/content/107/14/6477.full) (the data was retrieved from study [232](https://qiita.ucsd.edu/study/description/232) in [Qiita](https://qiita.ucsd.edu), remember you need to be logged in to access the study).\n",
"\n",
"Specifically, here we will reproduce *Figure 1 A*."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false,
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"mf = load_mf('keyboard/mapping-file.txt')\n",
"res = OrdinationResults.read('keyboard/unweighted-unifrac.even1000.txt')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Original data"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"<script type=\"text/javascript\">\n",
"\n",
"if ($(\"#emperor-css\").length == 0){{\n",
" $(\"head\").append([\n",
"\n",
" '<link id=\"emperor-css\" rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/css/emperor.css\">',\n",
" '<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/css/jquery-ui.min.css\">',\n",
" '<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/css/slick.grid.min.css\">',\n",
" '<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/css/spectrum.min.css\">',\n",
" '<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/css/chosen.min.css\">',\n",
" '<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/css/jquery.contextMenu.min.css\">'\n",
" ]);\n",
"}}\n",
"</script>\n",
"\n",
"<div id='emperor-notebook-0x7de46144' style=\"position: relative; width:100%; height:500px;\">\n",
" <div class='loading' style=\"position: absolute;top: 50%;left: 50%;margin-left: -229px; margin-top: -59px; z-index: 10000;height:118px;width:458px;padding:0px\"><img src='https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/img/emperor.png' alt='Emperor resources missing. Expected them to be found in https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files'></div>\n",
"</div>\n",
"</div>\n",
"\n",
"<script type=\"text/javascript\">\n",
"requirejs.config({\n",
"// the left side is the module name, and the right side is the path\n",
"// relative to the baseUrl attribute, do NOT include the .js extension\n",
"'paths': {\n",
" /* jQuery */\n",
" 'jquery': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/jquery-2.1.4.min',\n",
" 'jqueryui': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/jquery-ui.min',\n",
" 'jquery_drag': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/jquery.event.drag-2.2.min',\n",
"\n",
" /* jQuery plugins */\n",
" 'chosen': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/chosen.jquery.min',\n",
" 'spectrum': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/spectrum.min',\n",
" 'position': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/jquery.ui.position.min',\n",
" 'contextmenu': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/jquery.contextMenu.min',\n",
"\n",
" /* other libraries */\n",
" 'underscore': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/underscore-min',\n",
" 'chroma': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/chroma.min',\n",
" 'filesaver': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/FileSaver.min',\n",
" 'blob': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/Blob',\n",
" 'd3': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/d3.min',\n",
"\n",
"\n",
" /* THREE.js and plugins */\n",
" 'three': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/three.min',\n",
" 'orbitcontrols': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/three.js-plugins/OrbitControls',\n",
"\n",
" /* SlickGrid */\n",
" 'slickcore': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/slick.core.min',\n",
" 'slickgrid': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/slick.grid.min',\n",
" 'slickformatters': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/slick.editors.min',\n",
" 'slickeditors': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/slick.formatters.min',\n",
"\n",
" /* Emperor's objects */\n",
" 'util': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/util',\n",
" 'model': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/model',\n",
" 'view': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/view',\n",
" 'controller': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/controller',\n",
" 'draw': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/draw',\n",
" 'scene3d': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/sceneplotview3d',\n",
" 'viewcontroller': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/view-controller',\n",
" 'colorviewcontroller': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/color-view-controller',\n",
" 'visibilitycontroller': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/visibility-controller',\n",
" 'scaleviewcontroller': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/scale-view-controller',\n",
" 'shapecontroller': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/shape-controller',\n",
" 'axescontroller': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/axes-controller',\n",
" 'shape-editor': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/shape-editor',\n",
" 'color-editor': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/color-editor',\n",
" 'scale-editor': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/scale-editor',\n",
" 'shapes': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/shapes'\n",
"},\n",
"/*\n",
" Libraries that are not AMD compatible need shim to declare their\n",
" dependencies.\n",
" */\n",
"'shim': {\n",
" 'jquery_drag': {\n",
" 'deps': ['jquery', 'jqueryui']\n",
" },\n",
" 'chosen': {\n",
" 'deps': ['jquery'],\n",
" 'exports': 'jQuery.fn.chosen'\n",
" },\n",
" 'contextmenu' : {\n",
" 'deps': ['jquery', 'jqueryui', 'position']\n",
" },\n",
" 'filesaver' : {\n",
" 'deps': ['blob']\n",
" },\n",
" 'orbitcontrols': {\n",
" 'deps': ['three']\n",
" },\n",
"'slickcore': ['jqueryui'],\n",
"'slickgrid': ['slickcore', 'jquery_drag', 'slickformatters',\n",
" 'slickeditors']\n",
"}\n",
"});\n",
"\n",
"requirejs(\n",
"[\"jquery\", \"model\", \"controller\"],\n",
"function($, model, EmperorController) {\n",
" var DecompositionModel = model.DecompositionModel;\n",
"\n",
" var div = $('#emperor-notebook-0x7de46144');\n",
"\n",
" var ids = ['232.M3Rkey217', '232.M9Thmr217', '232.M9Pinr217', '232.M3Rinr217', '232.M2Enter217', '232.M9Ekey217', '232.M3Midr217', '232.M3Tkey217', '232.M2Rsft217', '232.M2Okey217', '232.M3Ckey217', '232.M9Akey217', '232.M9Hkey217', '232.M9Pinl217', '232.M2Kkey217', '232.M9Midl217', '232.M9Rinl217', '232.M9Ckey217', '232.M2Midr217', '232.M2Lsft217', '232.M9Mkey217', '232.M9Enter217', '232.M9Bkey217', '232.M9Fkey217', '232.M2Mkey217', '232.M3Wkey217', '232.M9Gkey217', '232.M3Midl217', '232.M3Pinl217', '232.M9Vkey217', '232.M9Wkey217', '232.M2Tkey217', '232.M3Indl217', '232.M9Dkey217', '232.M2Indl217', '232.M2Hkey217', '232.M2Gkey217', '232.M2Jkey217', '232.M9Nkey217', '232.M2Ukey217', '232.M2Fkey217', '232.M3Lkey217', '232.M9Pkey217', '232.M2Ykey217', '232.M2Pinr217', '232.M2Ikey217', '232.M2Ekey217', '232.M9Midr217', '232.M3Space217', '232.M9Thml217', '232.M9Rinr217', '232.M3Mkey217', '232.M3Rsft217', '232.M2Lkey217', '232.M2Space217', '232.M2Vkey217', '232.M3Gkey217', '232.M9Indr217', '232.M3Xkey217', '232.M3Ykey217', '232.M3Vkey217', '232.M2Bkey217', '232.M3Pinr217', '232.M3Bkey217', '232.M2Akey217', '232.M3Indr217', '232.M3Thmr217', '232.M2Skey217', '232.M2Wkey217', '232.M2Rinr217', '232.M2Ckey217', '232.M3Ekey217', '232.M2Zkey217', '232.M2Thml217', '232.M2Rinl217', '232.M3Rinl217', '232.M2Midl217'];\n",
" var coords = [[-0.15847722181174662, -0.17073904878616603, -0.09906305119186608, 0.026134878685267784, 0.06376486684566962], [-0.16364986560249942, 0.16054696199780816, -0.1705790982632183, -0.05197318273194283, -0.011532543354058317], [-0.15600437955361796, 0.17817363880030943, -0.11243701625055075, 0.05400515113780437, -0.07158538372733535], [-0.08793208903635144, -0.21150279045474904, -0.0912521129842712, 0.027392142207187206, -0.1168078281339687], [0.2713433332622112, -0.012638234636494947, -0.029044902189380023, -0.08524856180846827, -0.09552021304468544], [-0.19604375771857635, 0.06943428720639651, 0.1644037396465877, -0.15274462212262158, 0.002485811221681098], [-0.12313236193983815, -0.20396253696135883, 0.0068688949686654035, 0.03245281602572589, -0.12229887273827975], [-0.18617241853623093, -0.06957738801372167, -0.008329883603523441, 0.03351511241361556, 0.08363160533038863], [0.2912046485818605, 0.036720642571267476, 0.01914945502564694, 0.04546272290755183, 0.019002166146798685], [0.18037191232760622, -0.044302059622163215, -0.035074634665129216, -0.13607877379871836, 0.007843650881171676], [-0.20651854409191342, -0.10754641605410008, 0.218867181776635, 0.02748408650815357, 0.08474828402397482], [-0.1628901111693042, 0.09491828490066438, 0.2510642890940943, -0.08062018459183551, -0.04732163718460894], [-0.22156513545344897, 0.1910730168085156, 0.0942922444528773, -0.02781978153366029, -0.05051561679811557], [-0.15632485239781546, 0.13674859553161825, -0.17788276770383105, 0.002239855654937378, -0.042446773920532456], [0.24636297782644334, 0.03938363206047184, -0.022578688915348408, -0.07504443813875299, -0.057047076790432825], [-0.19274640579634994, 0.13352480685670834, -0.09951928886955352, 0.0014905430633917359, 0.01639911206400835], [-0.15637700640926644, 0.04472580043499717, -0.1601341137824791, -0.06156126491462396, 0.027351972560853456], [-0.23708319050357407, 0.12539159443825545, 0.052639079765370723, -0.019720310439054978, 0.018834714982491636], [0.17669949269481314, 0.01684731901755309, -0.1141809834460892, 0.019364099652335504, -0.016504939651248298], [0.252505593864388, 0.014555331870780876, -0.025572762449383855, -0.06381731788702742, 0.020238559338512034], [-0.2299868224010091, 0.10912856284019634, -0.016786017429864828, 0.014259995035462603, 0.02049310954238161], [-0.23353442534598845, 0.12200627629078384, 0.12488628056351879, -0.04265237632833217, -0.026108207472737288], [-0.24399001271503873, 0.13994050162090302, 0.039048592190188754, -0.01659881901675978, 0.040857596629480776], [-0.23225540782105822, 0.12148858347242074, 0.04784053875093525, -0.07110629856735963, 0.11927669267344856], [0.1205169559886714, 0.008842728496562641, -0.05284919402119804, -0.06258363782930602, 0.09020816091377616], [-0.1656239356143644, -0.1322681822839339, -0.04854077028296568, 0.08945793910806446, 0.17584122282114784], [-0.20134237310614408, 0.12379371927269059, 0.14562061802939283, -0.06358779738900454, -0.07735334049926629], [-0.14648910732595083, -0.053203021985261646, -0.08316485021554348, 0.05247722284738077, -0.07717368947137826], [-0.03714165590970512, -0.2161950319359749, -0.06638760744028739, 0.06828540018783485, -0.06963083698415126], [-0.1902254991440429, 0.12060992474464073, 0.054394178262302646, -0.03495618135599851, -0.016446698495977338], [-0.23056565771535997, 0.16378140716513584, 0.050111870526429354, 0.008056507785298485, -0.004244207422354574], [0.2932617322976969, 0.005779119528765214, 0.02359815212959581, -0.022609295700845376, 0.0614909769277533], [-0.18496001307898863, -0.06631101902657956, -0.10321713059567886, 0.055866628535363685, 0.08244778557528952], [-0.22248369184406577, 0.13745374526238285, 0.08215796787844744, -0.07994396370228411, 0.016623006896299374], [0.20619448134159513, 0.020972261528253572, -0.025616711183721733, 0.015561603161059088, 0.036383835996830566], [0.22216075959328765, -0.021379940144554573, -0.07139324497432455, -0.15480664661929971, 0.09426207644864631], [0.3045123893465498, 0.010779208180364952, -0.10609847305401054, -0.03372041772852855, 0.026999691378741877], [0.22450087194943907, -0.06920080239473857, -0.04154374584593726, -0.21041213670486475, 0.09770947788503156], [-0.25564311281172614, 0.10930395417653681, 0.17174199676593258, -0.06852373874093186, -0.05921030657274124], [0.170973405832797, -0.017337262074115616, -0.055907798564627464, -0.0806158408279561, -0.007761858828594522], [0.3510265980241152, -0.0035754568645139026, -0.03463408369370149, -0.057047878090560354, -0.03779423682265439], [-0.12640116000168067, -0.22190603417648808, 0.022473611719251222, -0.022616459823228185, 0.056162563013940826], [-0.2356419201628681, 0.0330190991009863, 0.27903880986898916, -0.07191905264474356, -0.050245651086384596], [0.24366473711297643, -0.08079562088851543, -0.006921522626784851, -0.13821744177813852, 0.07250627410032398], [0.18011171788687969, 0.015402979870742106, -0.008802803373167857, -0.059246777985413876, 0.04331361203637086], [0.2456872562763719, -0.03990324822298844, 0.08812051379482759, -0.006819929388620808, -0.10991740062911624], [0.3682977847298556, 0.0478649521580663, 0.09361564634579024, 0.13641616128871284, -0.09010041988752744], [-0.1938926607580852, 0.18795460031719471, -0.21344090611199154, 0.038255206386736886, -0.03350014110061574], [-0.19168900554321885, -0.22152353374792114, 0.07143975501040269, -0.019697446299004825, -0.031371503561151474], [-0.23930798006082651, 0.2504407788032832, -0.07460899236970182, 0.03406020007750262, -0.02438830654821116], [-0.16309261567340014, 0.21115256111857691, -0.2262520932666264, 0.046969186685865895, -0.038170277484061876], [-0.14948783606594587, -0.21192638374223463, 0.13819037969707026, -0.04756247990395532, 0.06062861033979177], [-0.1293675874962787, -0.12928819044216355, -0.010541797652542373, 0.04687797406468326, -0.1384684923654932], [0.1853715242266962, -0.04394076376577986, 0.0013957413384741062, -0.08713319689971091, 0.11051971956688506], [0.35223305849195774, -0.02617411803865043, -0.0035699117340603414, -0.09720392816850307, -0.08807674165704805], [0.3657428302603664, 0.02398432130768629, -0.009965214758798897, -0.02440104170499919, -0.009946510063913857], [-0.1306513653138075, -0.12255623486647205, -0.06423217213467512, 0.016758606503725536, 0.16845698607059986], [-0.19927266681288283, 0.20920906228393868, -0.1697924444420701, 0.028979324365290042, -0.046242263775499094], [-0.12496946972727718, -0.22187532285360786, -0.06751711214893302, -0.004956783891719452, -0.05859899402568845], [-0.16029002821260424, -0.16435051448056975, 0.004489617087366096, -0.04065527088625365, -0.07342478249812615], [-0.1999446986079944, -0.11197931719829385, 0.05614128595392682, 0.056712510058803456, 0.12544482964194237], [0.3155976418651947, 0.06181751810464016, -0.03983658891207111, -0.019480623503277786, 0.025521482991186763], [-0.13673609373804937, -0.23186848427230336, -0.024311609113230385, 0.041388191695546674, -0.17416805194930957], [-0.18141765530423826, -0.1059983575557922, -0.01229614981853032, 0.08254603932784547, 0.1392329897856232], [0.3317327175964213, 0.031494188522948004, 0.003731619268066509, 0.02465980933022621, -3.0439093436323844e-05], [-0.10621826956644516, -0.25131819509746434, -0.014896748387920242, 0.04345134189355944, -0.038575710784660336], [-0.14129602772271763, -0.1756429547567575, -0.07199362277576048, 0.09748286091573695, -0.11137468562433617], [0.2676657252799645, 0.022724293292963778, 0.081082933900486, 0.13633659095053513, 0.04413583207885528], [0.34420030621006364, 0.01839022700771831, 0.06662309051527394, -0.022038885202044545, -0.0381067810278206], [0.1904330375606542, 0.021547297298635325, -0.047426260379625176, -0.00995748336814545, 0.06487085248938376], [0.38895777685685196, 0.0808088261048264, 0.10129752373641526, 0.11044866831795262, -0.07980610441760053], [-0.19985586324704674, -0.12768336870987695, 0.045215825749683164, 0.0645140247955815, 0.023251974368653117], [0.3051909705017922, -0.01371994822117235, 0.047361317623258695, -0.04412772222520342, -0.07246043769180281], [0.299890280724788, 0.08977878843297697, 0.09116669471839534, 0.1892764639789811, 0.0022454014974880943], [0.22198729527657515, 0.12214532788707587, 0.11021906264009938, 0.2757058013168845, 0.09878294657818378], [-0.16985599804002258, -0.08194430923815636, -0.038144881316898305, 0.10707573904793212, 0.011993693727467282], [0.24014814312048188, 0.12047536482638979, 0.10805125414547544, 0.31840658432316493, 0.06031581781385022]];\n",
" var pct_var = [23.330988966275164, 7.445584042747666, 4.615896035685128, 3.4824013326861176, 2.5304874508573096];\n",
" var md_headers = ['SampleID', 'BarcodeSequence', 'LinkerPrimerSequence', 'center_name', 'center_project_name', 'emp_status', 'experiment_design_description', 'key_seq', 'library_construction_protocol', 'linker', 'platform', 'region', 'run_center', 'run_date', 'run_prefix', 'samp_size', 'sample_center', 'sequencing_meth', 'study_center', 'target_gene', 'target_subfragment', 'age', 'age_unit', 'altitude', 'anonymized_name', 'assigned_from_geo', 'body_habitat', 'body_product', 'body_site', 'collection_timestamp', 'country', 'depth', 'dna_extracted', 'elevation', 'env_biome', 'env_feature', 'env_matter', 'has_physical_specimen', 'host_subject_id', 'host_taxid', 'latitude', 'longitude', 'physical_specimen_remaining', 'project_name', 'required_sample_info_status', 'sample_type', 'sex', 'taxon_id', 'title', 'Description'];\n",
" var metadata = [['232.M3Rkey217', 'AGTCCATAGCTG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Rkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Rkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Rkey'], ['232.M9Thmr217', 'ACTACGTGTGGT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Thmr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M9Pinr217', 'ACTCAGATACTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Pinr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Rinr217', 'AGGTGTGATCGC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Rinr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Enter217', 'ACGGTGAGTGTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Enter217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ente', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ente'], ['232.M9Ekey217', 'ACTCTTCTAGAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Ekey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ekey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ekey'], ['232.M3Midr217', 'AGGCTACACGAC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Midr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Tkey217', 'AGTCTACTCTGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Tkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Tkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Tkey'], ['232.M2Rsft217', 'ACGCTCATGGAT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Rsft217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Right_shift', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Right_shift'], ['232.M2Okey217', 'ACAGTTGCGCGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Okey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Okey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Okey'], ['232.M3Ckey217', 'ATAGGCGATCTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Ckey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ckey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ckey'], ['232.M9Akey217', 'AGACCGTCAGAC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Akey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Akey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Akey'], ['232.M9Hkey217', 'AGAGTAGCTAAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Hkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Hkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Hkey'], ['232.M9Pinl217', 'ACTACAGCCTAT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Pinl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Kkey217', 'ACCTCGATCAGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Kkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Kkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Kkey'], ['232.M9Midl217', 'ACGTGCCGTAGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Midl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M9Rinl217', 'ACGTTAGCACAC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Rinl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M9Ckey217', 'AGCACACCTACA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Ckey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ckey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ckey'], ['232.M2Midr217', 'ACACACTATGGC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '36', 'years', '0', 'M2Midr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M2', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Lsft217', 'ACGGATCGTCAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Lsft217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Left_shift', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Left_shift'], ['232.M9Mkey217', 'AGCATATGAGAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Mkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Mkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Mkey'], ['232.M9Enter217', 'AGCGAGCTATCT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Enter217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ente', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ente'], ['232.M9Bkey217', 'AGCAGCACTTGT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Bkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Bkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Bkey'], ['232.M9Fkey217', 'AGAGAGCAAGTG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Fkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Fkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Fkey'], ['232.M2Mkey217', 'ACGCTATCTGGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Mkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Mkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Mkey'], ['232.M3Wkey217', 'AGTAGTATCCTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Wkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Wkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Wkey'], ['232.M9Gkey217', 'AGAGCAAGAGCA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Gkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Gkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Gkey'], ['232.M3Midl217', 'AGCTCCATACAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Midl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Pinl217', 'AGCTGACTAGTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Pinl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M9Vkey217', 'AGCACGAGCCTA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Vkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Vkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Vkey'], ['232.M9Wkey217', 'ACTCGCACAGGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Wkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Wkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Wkey'], ['232.M2Tkey217', 'ACAGAGTCGGCT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Tkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Tkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Tkey'], ['232.M3Indl217', 'AGCTATCCACGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Indl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M9Dkey217', 'AGACTGCGTACT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Dkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Dkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Dkey'], ['232.M2Indl217', 'AACTCGTCGATG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '36', 'years', '0', 'M2Indl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M2', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Hkey217', 'ACCAGCGACTAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Hkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Hkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Hkey'], ['232.M2Gkey217', 'ACCAGACGATGC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Gkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Gkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Gkey'], ['232.M2Jkey217', 'ACCGCAGAGTCA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Jkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Jkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Jkey'], ['232.M9Nkey217', 'AGCAGTCGCGAT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Nkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Nkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Nkey'], ['232.M2Ukey217', 'ACAGCTAGCTTG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Ukey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ukey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ukey'], ['232.M2Fkey217', 'ACCACATACATC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Fkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Fkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Fkey'], ['232.M3Lkey217', 'ATACTATTGCGC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Lkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Lkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Lkey'], ['232.M9Pkey217', 'AGAACACGTCTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Pkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Pkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Pkey'], ['232.M2Ykey217', 'ACAGCAGTGGTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Ykey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ykey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ykey'], ['232.M2Pinr217', 'ACACGAGCCACA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '36', 'years', '0', 'M2Pinr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M2', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Ikey217', 'ACAGTGCTTCAT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Ikey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ikey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ikey'], ['232.M2Ekey217', 'ACACTGTTCATG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Ekey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ekey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ekey'], ['232.M9Midr217', 'ACTATTGTCACG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Midr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Space217', 'ATCGATCTGTGG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Space217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Space_bar', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Space_bar'], ['232.M9Thml217', 'ACGTCTGTAGCA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Thml217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M9Rinr217', 'ACTCACGGTATG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Rinr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Mkey217', 'ATCACTAGTCAC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Mkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Mkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Mkey'], ['232.M3Rsft217', 'ATCCGATCACAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Rsft217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Right_shift', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Right_shift'], ['232.M2Lkey217', 'ACCTGTCTCTCT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Lkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Lkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Lkey'], ['232.M2Space217', 'ACGTACTCAGTG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Space217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Space_bar', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Space_bar'], ['232.M2Vkey217', 'ACGCAACTGCTA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Vkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Vkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Vkey'], ['232.M3Gkey217', 'ATAATCTCGTCG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Gkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Gkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Gkey'], ['232.M9Indr217', 'ACTAGCTCCATA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Indr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Xkey217', 'ATAGCTCCATAC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Xkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Xkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Xkey'], ['232.M3Ykey217', 'AGTCTCGCATAT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Ykey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ykey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ykey'], ['232.M3Vkey217', 'ATATCGCTACTG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Vkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Vkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Vkey'], ['232.M2Bkey217', 'ACGCGATACTGG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Bkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Bkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Bkey'], ['232.M3Pinr217', 'AGTACGCTCGAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Pinr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Bkey217', 'ATATGCCAGTGC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Bkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Bkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Bkey'], ['232.M2Akey217', 'ACATGATCGTTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Akey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Akey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Akey'], ['232.M3Indr217', 'AGGACGCACTGT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Indr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Thmr217', 'AGCTTGACAGCT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Thmr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Skey217', 'ACATGTCACGTG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Skey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Skey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Skey'], ['232.M2Wkey217', 'ACACTAGATCCG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Wkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Wkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Wkey'], ['232.M2Rinr217', 'ACACATGTCTAC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '36', 'years', '0', 'M2Rinr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M2', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Ckey217', 'ACGATGCGACCA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Ckey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ckey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ckey'], ['232.M3Ekey217', 'AGTCACATCACT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Ekey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ekey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ekey'], ['232.M2Zkey217', 'ACGACGTCTTAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Zkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Zkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Zkey'], ['232.M2Thml217', 'AACGCACGCTAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '36', 'years', '0', 'M2Thml217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M2', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Rinl217', 'AAGAGATGTCGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '36', 'years', '0', 'M2Rinl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M2', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Rinl217', 'AGCTCTCAGAGG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Rinl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Midl217', 'AACTGTGCGTAC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '36', 'years', '0', 'M2Midl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M2', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip']];\n",
" var axesNames = [0, 1, 2, 3, 4];\n",
"\n",
" var dm, ec;\n",
"\n",
" function init() {\n",
" // Initialize the DecompositionModel\n",
" dm = new DecompositionModel(name, ids, coords, pct_var,\n",
" md_headers, metadata, axesNames);\n",
" // Initialize the EmperorController\n",
" ec = new EmperorController(dm, 'emperor-notebook-0x7de46144');\n",
" }\n",
"\n",
" function animate() {\n",
" requestAnimationFrame(animate);\n",
" ec.render();\n",
" }\n",
" $(window).resize(function() {\n",
" ec.resize(div.innerWidth(), div.innerHeight());\n",
" });\n",
"\n",
" $(function(){\n",
" init();\n",
" animate();\n",
"\n",
" });\n",
"\n",
"}); // END REQUIRE.JS block\n",
"</script>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<emperor.core.Emperor at 0x1142d6828>"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# change the remote parameter to False/True depending on what you want to do\n",
"Emperor(res, mf, remote=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# change the remote parameter to False/True depending on what you want to do\n",
"x = Emperor(res, mf, remote=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using scikit-learn's scale"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"<script type=\"text/javascript\">\n",
"\n",
"if ($(\"#emperor-css\").length == 0){{\n",
" $(\"head\").append([\n",
"\n",
" '<link id=\"emperor-css\" rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/css/emperor.css\">',\n",
" '<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/css/jquery-ui.min.css\">',\n",
" '<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/css/slick.grid.min.css\">',\n",
" '<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/css/spectrum.min.css\">',\n",
" '<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/css/chosen.min.css\">',\n",
" '<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/css/jquery.contextMenu.min.css\">'\n",
" ]);\n",
"}}\n",
"</script>\n",
"\n",
"<div id='emperor-notebook-0x47105cf9' style=\"position: relative; width:100%; height:500px;\">\n",
" <div class='loading' style=\"position: absolute;top: 50%;left: 50%;margin-left: -229px; margin-top: -59px; z-index: 10000;height:118px;width:458px;padding:0px\"><img src='https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/img/emperor.png' alt='Emperor resources missing. Expected them to be found in https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files'></div>\n",
"</div>\n",
"</div>\n",
"\n",
"<script type=\"text/javascript\">\n",
"requirejs.config({\n",
"// the left side is the module name, and the right side is the path\n",
"// relative to the baseUrl attribute, do NOT include the .js extension\n",
"'paths': {\n",
" /* jQuery */\n",
" 'jquery': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/jquery-2.1.4.min',\n",
" 'jqueryui': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/jquery-ui.min',\n",
" 'jquery_drag': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/jquery.event.drag-2.2.min',\n",
"\n",
" /* jQuery plugins */\n",
" 'chosen': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/chosen.jquery.min',\n",
" 'spectrum': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/spectrum.min',\n",
" 'position': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/jquery.ui.position.min',\n",
" 'contextmenu': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/jquery.contextMenu.min',\n",
"\n",
" /* other libraries */\n",
" 'underscore': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/underscore-min',\n",
" 'chroma': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/chroma.min',\n",
" 'filesaver': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/FileSaver.min',\n",
" 'blob': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/Blob',\n",
" 'd3': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/d3.min',\n",
"\n",
"\n",
" /* THREE.js and plugins */\n",
" 'three': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/three.min',\n",
" 'orbitcontrols': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/three.js-plugins/OrbitControls',\n",
"\n",
" /* SlickGrid */\n",
" 'slickcore': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/slick.core.min',\n",
" 'slickgrid': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/slick.grid.min',\n",
" 'slickformatters': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/slick.editors.min',\n",
" 'slickeditors': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/vendor/js/slick.formatters.min',\n",
"\n",
" /* Emperor's objects */\n",
" 'util': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/util',\n",
" 'model': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/model',\n",
" 'view': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/view',\n",
" 'controller': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/controller',\n",
" 'draw': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/draw',\n",
" 'scene3d': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/sceneplotview3d',\n",
" 'viewcontroller': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/view-controller',\n",
" 'colorviewcontroller': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/color-view-controller',\n",
" 'visibilitycontroller': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/visibility-controller',\n",
" 'scaleviewcontroller': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/scale-view-controller',\n",
" 'shapecontroller': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/shape-controller',\n",
" 'axescontroller': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/axes-controller',\n",
" 'shape-editor': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/shape-editor',\n",
" 'color-editor': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/color-editor',\n",
" 'scale-editor': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/scale-editor',\n",
" 'shapes': 'https://cdn.rawgit.com/biocore/emperor/new-api/emperor/support_files/js/shapes'\n",
"},\n",
"/*\n",
" Libraries that are not AMD compatible need shim to declare their\n",
" dependencies.\n",
" */\n",
"'shim': {\n",
" 'jquery_drag': {\n",
" 'deps': ['jquery', 'jqueryui']\n",
" },\n",
" 'chosen': {\n",
" 'deps': ['jquery'],\n",
" 'exports': 'jQuery.fn.chosen'\n",
" },\n",
" 'contextmenu' : {\n",
" 'deps': ['jquery', 'jqueryui', 'position']\n",
" },\n",
" 'filesaver' : {\n",
" 'deps': ['blob']\n",
" },\n",
" 'orbitcontrols': {\n",
" 'deps': ['three']\n",
" },\n",
"'slickcore': ['jqueryui'],\n",
"'slickgrid': ['slickcore', 'jquery_drag', 'slickformatters',\n",
" 'slickeditors']\n",
"}\n",
"});\n",
"\n",
"requirejs(\n",
"[\"jquery\", \"model\", \"controller\"],\n",
"function($, model, EmperorController) {\n",
" var DecompositionModel = model.DecompositionModel;\n",
"\n",
" var div = $('#emperor-notebook-0x47105cf9');\n",
"\n",
" var ids = ['232.M3Rkey217', '232.M9Thmr217', '232.M9Pinr217', '232.M3Rinr217', '232.M2Enter217', '232.M9Ekey217', '232.M3Midr217', '232.M3Tkey217', '232.M2Rsft217', '232.M2Okey217', '232.M3Ckey217', '232.M9Akey217', '232.M9Hkey217', '232.M9Pinl217', '232.M2Kkey217', '232.M9Midl217', '232.M9Rinl217', '232.M9Ckey217', '232.M2Midr217', '232.M2Lsft217', '232.M9Mkey217', '232.M9Enter217', '232.M9Bkey217', '232.M9Fkey217', '232.M2Mkey217', '232.M3Wkey217', '232.M9Gkey217', '232.M3Midl217', '232.M3Pinl217', '232.M9Vkey217', '232.M9Wkey217', '232.M2Tkey217', '232.M3Indl217', '232.M9Dkey217', '232.M2Indl217', '232.M2Hkey217', '232.M2Gkey217', '232.M2Jkey217', '232.M9Nkey217', '232.M2Ukey217', '232.M2Fkey217', '232.M3Lkey217', '232.M9Pkey217', '232.M2Ykey217', '232.M2Pinr217', '232.M2Ikey217', '232.M2Ekey217', '232.M9Midr217', '232.M3Space217', '232.M9Thml217', '232.M9Rinr217', '232.M3Mkey217', '232.M3Rsft217', '232.M2Lkey217', '232.M2Space217', '232.M2Vkey217', '232.M3Gkey217', '232.M9Indr217', '232.M3Xkey217', '232.M3Ykey217', '232.M3Vkey217', '232.M2Bkey217', '232.M3Pinr217', '232.M3Bkey217', '232.M2Akey217', '232.M3Indr217', '232.M3Thmr217', '232.M2Skey217', '232.M2Wkey217', '232.M2Rinr217', '232.M2Ckey217', '232.M3Ekey217', '232.M2Zkey217', '232.M2Thml217', '232.M2Rinl217', '232.M3Rinl217', '232.M2Midl217'];\n",
" var coords = [[-0.7102684523641861, -1.3545833175372726, -0.9981732826628609, 0.30318194066140414, 0.867763968700346], [-0.7334513783259592, 1.273722900236411, -1.7187790646311907, -0.6029234186536566, -0.15694419333373175], [-0.6991855861734335, 1.4135667292259517, -1.1329312418033501, 0.6264956009496093, -0.974191898408838], [-0.3940969438951249, -1.6779884484503336, -0.9194691670764038, 0.3177670320669821, -1.5896155599622994], [1.216115522295237, -0.10026729048466419, -0.29266053300582817, -0.9889399036030109, -1.299916447145971], [-0.8786353950338603, 0.550867114368609, 1.656555279801023, -1.771939006046885, 0.03382893304531921], [-0.5518586908019181, -1.6181667400317723, 0.06921195497865033, 0.37647420755595057, -1.6643421436416812], [-0.8343937007155747, -0.5520024256386833, -0.08393308262456615, 0.38879755079073436, 1.1381266415238749], [1.3051306219589716, 0.2913286104726042, 0.19295261103086533, 0.527397763152158, 0.2585968719947106], [0.8083968002107014, -0.3514768960197358, -0.35341696829107216, -1.5786041029674287, 0.10674275591634427], [-0.9255816389241865, -0.8532352855629256, 2.20533660807624, 0.3188336469893407, 1.1533233098907167], [-0.7300462857879072, 0.7530481525448657, 2.5297592047623754, -0.9352476556476854, -0.6439911775884596], [-0.9930179495657834, 1.515905839000464, 0.9501019607490704, -0.3227279321141803, -0.6874574398496618], [-0.7006218919621816, 1.0849150649525632, -1.7923718685405, 0.02598381237744658, -0.577650088801354], [1.1041577393909747, 0.3124558286602319, -0.2275060556049921, -0.8705652957010744, -0.7763428390469732], [-0.8638572141702289, 1.0593386640686173, -1.0027703978832443, 0.017291288933865636, 0.2231724031082492], [-0.700855637535043, 0.35483870598708245, -1.6135339271044946, -0.7141515363875619, 0.3722278024767969], [-1.0625672817546545, 0.9948126289387091, 0.5303987956517385, -0.2287686911180621, 0.25631806088616915], [0.7919376284811875, 0.13366067954956853, -1.1505037013579764, 0.22463640954512212, -0.224612590654215], [1.1316879190398899, 0.11547686292941263, -0.25767476302965847, -0.7403232483991343, 0.2754227122416918], [-1.0307625445691255, 0.865787479437599, -0.16913828030949973, 0.165425407966811, 0.27888683764601463], [-1.0466623087404479, 0.9679547102626811, 1.2583717857448562, -0.49479587737033376, -0.35530163949104393], [-1.0935225058985951, 1.1102385206613143, 0.393459125081385, -0.19255731862361622, 0.5560232767139215], [-1.0409299657097377, 0.9638475182620875, 0.4820480192649447, -0.8248802625991779, 1.623213868826456], [0.5401368779381995, 0.07015508513202962, -0.5325159365428179, -0.7260117408325778, 1.2276257380597653], [-0.742298830572765, -1.0493690484781053, -0.48910365099182435, 1.037771474395303, 2.392989822212161], [-0.9023829046631706, 0.9821356516557208, 1.467293895887274, -0.7376606582700523, -1.0526869272123482], [-0.6565397245050779, -0.42209398808389864, -0.837980766219628, 0.608770618566461, -1.0502420904761793], [-0.16646270145097922, -1.715215035323763, -0.6689309005681632, 0.7921559689296974, -0.9475928428038008], [-0.8525589313885201, 0.9568765502092552, 0.5480834157699511, -0.4055149070807732, -0.22381999782779316], [-1.0333567878187082, 1.2993838459679192, 0.504934278743974, 0.09346083808980465, -0.05775861315171834], [1.3143501277688747, 0.045849493477754084, 0.23777831080888728, -0.262282837816801, 0.8368190324498788], [-0.8289599018521878, -0.5260882075944844, -1.0400303729206102, 0.6480899743800667, 1.1220162631959942], [-0.9971347659669924, 1.0905094738967989, 0.8278352777092064, -0.9274030444643344, 0.22621934537993296], [0.9241292437754953, 0.16638651672928176, -0.25811759667848233, 0.1805249262102779, 0.4951407174983593], [0.9956874375285173, -0.16962089490160934, -0.7193684107099925, -1.7958598589470607, 1.2827947050369748], [1.364773694504928, 0.0855184310864379, -1.0690631861189277, -0.39117922872209326, 0.3674336747414176], [1.00617543045621, -0.5490147283267589, -0.4186006454095131, -2.4409204539699587, 1.3297097367797894], [-1.1457497551921942, 0.8671789723616631, 1.7304965940419277, -0.7949208543509231, -0.805781822518076], [0.7662742630644318, -0.1375477147397079, -0.5633348675241521, -0.9351972651033637, -0.10562966339904477], [1.5732426128306427, -0.028366412110625776, -0.3489779145365232, -0.6617932533148125, -0.5143345946068509], [-0.5665088980300198, -1.7605241102917615, 0.22644728294960342, -0.2623659463918709, 0.764305658966028], [-1.0561077486897221, 0.2619618717585945, 2.811634423593612, -0.8343087493899213, -0.6837835277251109], [1.0920646749508576, -0.6410039236116243, -0.06974223868818578, -1.6034140711421236, 0.9867241206862982], [0.8072306521635825, 0.12220180281899429, -0.08869828895752929, -0.6873019516895116, 0.5894467241717573], [1.1011292682068414, -0.31657827979412095, 0.8879147317415075, -0.07911570786748522, -1.49584504002811], [1.6506491884365124, 0.37974362718418564, 0.9432844627468483, 1.5825180218037556, -1.226159510885548], [-0.8689945375557536, 1.4911654238095213, -2.150660688773893, 0.44378578727711415, -0.455897061054807], [-0.8591181227554825, -1.7574894869693807, 0.7198370523965785, -0.22850345191663177, -0.42692883685007793], [-1.0725384171493852, 1.9869086972766155, -0.7517707352420864, 0.39512092951225214, -0.3318958342903459], [-0.7309538771696633, 1.6752098525786592, -2.2797480183392085, 0.5448737429470377, -0.5194520605858115], [-0.669979526117365, -1.6813490880036637, 1.3924257659648756, -0.5517563380953755, 0.8250832492535418], [-0.5798039308527275, -1.0257268455728124, -0.10622064070715642, 0.5438156159958476, -1.8843914277411218], [0.8308042261195584, -0.34861050228478435, 0.014063686680461936, -1.0108029216518997, 1.504041884116669], [1.5786497672439819, -0.20765620927648012, -0.03597093438503185, -1.1276300891593967, -1.1986196579928334], [1.6391983090220519, 0.19028313532737684, -0.10041091010779818, -0.2830682807971767, -0.13536016735783354], [-0.585557608720193, -0.9723178873874215, -0.6472124302738919, 0.19441095953678322, 2.292499145990924], [-0.8931068265684313, 1.6597908191602924, -1.710852629724053, 0.33617939864697427, -0.6293021897565594], [-0.5600923012229824, -1.7602804575015605, -0.6803119493976438, -0.05750198337740262, -0.7974625861079889], [-0.7183931480274061, -1.3038989424315326, 0.04523801530518033, -0.4716281285957381, -0.9992239271156047], [-0.8961187608867931, -0.88840435778692, 0.5656875194062948, 0.657902761534524, 1.7071548739605549], [1.414455945748813, 0.49043835813432635, -0.40139909124581746, -0.2259881635627122, 0.3473170173969339], [-0.6128283457481077, -1.8395626711688935, -0.2449672040523827, 0.4801304964845864, -2.370219957427933], [-0.813083645658216, -0.8409535361254791, -0.12389774068946216, 0.957588848936812, 1.8947953280105105], [1.4867706616264726, 0.2498637697460093, 0.037600322316336844, 0.2860701570155988, -0.00041423984445646217], [-0.4760525523811835, -1.9938698083000328, -0.150101738846844, 0.5040644082674665, -0.5249695254120474], [-0.6332652086432718, -1.3934891757212082, -0.7254179021569752, 1.1308658941783822, -1.515676954898099], [1.1996330972496023, 0.1802865180303557, 0.817003083648501, 1.581594953063383, 0.6006361606515372], [1.5426483124843622, 0.14590156667428414, 0.6713036612609027, -0.25566569740139283, -0.5185879493660751], [0.8534890839258211, 0.17094864746085128, -0.4778736919365252, -0.11551341668785989, 0.8828151173812935], [1.7432438242216426, 0.6411086891344883, 1.0206881433296586, 1.281277866538585, -1.0860661257796431], [-0.8957206156289814, -1.0129947566825945, 0.45560104068931606, 0.748405511001206, 0.3164317053636477], [1.3678149819620624, -0.10884922406444278, 0.47721931955926084, -0.5119108690849273, -0.9861003417036557], [1.3440581752655911, 0.7122732025544263, 0.9186084805760101, 2.195732620829953, 0.030557242744690225], [0.9949099994187166, 0.9690578965223751, 1.1105828282475203, 3.1983703043542233, 1.3443183684530884], [-0.7612662278745145, -0.650117211324863, -0.3843532067988625, 1.2421496481101248, 0.16321990122732083], [1.0763038877280142, 0.9558089993369285, 1.0887396839547783, 3.6937277313203314, 0.8208265151440236]];\n",
" var pct_var = [23.330988966275164, 7.445584042747666, 4.615896035685128, 3.4824013326861176, 2.5304874508573096];\n",
" var md_headers = ['SampleID', 'BarcodeSequence', 'LinkerPrimerSequence', 'center_name', 'center_project_name', 'emp_status', 'experiment_design_description', 'key_seq', 'library_construction_protocol', 'linker', 'platform', 'region', 'run_center', 'run_date', 'run_prefix', 'samp_size', 'sample_center', 'sequencing_meth', 'study_center', 'target_gene', 'target_subfragment', 'age', 'age_unit', 'altitude', 'anonymized_name', 'assigned_from_geo', 'body_habitat', 'body_product', 'body_site', 'collection_timestamp', 'country', 'depth', 'dna_extracted', 'elevation', 'env_biome', 'env_feature', 'env_matter', 'has_physical_specimen', 'host_subject_id', 'host_taxid', 'latitude', 'longitude', 'physical_specimen_remaining', 'project_name', 'required_sample_info_status', 'sample_type', 'sex', 'taxon_id', 'title', 'Description'];\n",
" var metadata = [['232.M3Rkey217', 'AGTCCATAGCTG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Rkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Rkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Rkey'], ['232.M9Thmr217', 'ACTACGTGTGGT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Thmr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M9Pinr217', 'ACTCAGATACTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Pinr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Rinr217', 'AGGTGTGATCGC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Rinr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Enter217', 'ACGGTGAGTGTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Enter217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ente', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ente'], ['232.M9Ekey217', 'ACTCTTCTAGAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Ekey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ekey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ekey'], ['232.M3Midr217', 'AGGCTACACGAC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Midr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Tkey217', 'AGTCTACTCTGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Tkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Tkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Tkey'], ['232.M2Rsft217', 'ACGCTCATGGAT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Rsft217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Right_shift', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Right_shift'], ['232.M2Okey217', 'ACAGTTGCGCGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Okey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Okey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Okey'], ['232.M3Ckey217', 'ATAGGCGATCTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Ckey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ckey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ckey'], ['232.M9Akey217', 'AGACCGTCAGAC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Akey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Akey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Akey'], ['232.M9Hkey217', 'AGAGTAGCTAAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Hkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Hkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Hkey'], ['232.M9Pinl217', 'ACTACAGCCTAT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Pinl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Kkey217', 'ACCTCGATCAGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Kkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Kkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Kkey'], ['232.M9Midl217', 'ACGTGCCGTAGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Midl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M9Rinl217', 'ACGTTAGCACAC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Rinl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M9Ckey217', 'AGCACACCTACA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Ckey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ckey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ckey'], ['232.M2Midr217', 'ACACACTATGGC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '36', 'years', '0', 'M2Midr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M2', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Lsft217', 'ACGGATCGTCAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Lsft217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Left_shift', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Left_shift'], ['232.M9Mkey217', 'AGCATATGAGAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Mkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Mkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Mkey'], ['232.M9Enter217', 'AGCGAGCTATCT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Enter217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ente', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ente'], ['232.M9Bkey217', 'AGCAGCACTTGT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Bkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Bkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Bkey'], ['232.M9Fkey217', 'AGAGAGCAAGTG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Fkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Fkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Fkey'], ['232.M2Mkey217', 'ACGCTATCTGGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Mkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Mkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Mkey'], ['232.M3Wkey217', 'AGTAGTATCCTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Wkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Wkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Wkey'], ['232.M9Gkey217', 'AGAGCAAGAGCA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Gkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Gkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Gkey'], ['232.M3Midl217', 'AGCTCCATACAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Midl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Pinl217', 'AGCTGACTAGTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Pinl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M9Vkey217', 'AGCACGAGCCTA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Vkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Vkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Vkey'], ['232.M9Wkey217', 'ACTCGCACAGGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Wkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Wkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Wkey'], ['232.M2Tkey217', 'ACAGAGTCGGCT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Tkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Tkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Tkey'], ['232.M3Indl217', 'AGCTATCCACGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Indl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M9Dkey217', 'AGACTGCGTACT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Dkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Dkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Dkey'], ['232.M2Indl217', 'AACTCGTCGATG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '36', 'years', '0', 'M2Indl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M2', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Hkey217', 'ACCAGCGACTAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Hkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Hkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Hkey'], ['232.M2Gkey217', 'ACCAGACGATGC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Gkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Gkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Gkey'], ['232.M2Jkey217', 'ACCGCAGAGTCA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Jkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Jkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Jkey'], ['232.M9Nkey217', 'AGCAGTCGCGAT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Nkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Nkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Nkey'], ['232.M2Ukey217', 'ACAGCTAGCTTG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Ukey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ukey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ukey'], ['232.M2Fkey217', 'ACCACATACATC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Fkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Fkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Fkey'], ['232.M3Lkey217', 'ATACTATTGCGC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Lkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Lkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Lkey'], ['232.M9Pkey217', 'AGAACACGTCTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M9Pkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M9', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Pkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Pkey'], ['232.M2Ykey217', 'ACAGCAGTGGTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Ykey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ykey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ykey'], ['232.M2Pinr217', 'ACACGAGCCACA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '36', 'years', '0', 'M2Pinr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M2', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Ikey217', 'ACAGTGCTTCAT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Ikey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ikey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ikey'], ['232.M2Ekey217', 'ACACTGTTCATG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Ekey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ekey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ekey'], ['232.M9Midr217', 'ACTATTGTCACG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Midr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Space217', 'ATCGATCTGTGG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Space217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Space_bar', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Space_bar'], ['232.M9Thml217', 'ACGTCTGTAGCA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Thml217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M9Rinr217', 'ACTCACGGTATG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Rinr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Mkey217', 'ATCACTAGTCAC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Mkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Mkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Mkey'], ['232.M3Rsft217', 'ATCCGATCACAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Rsft217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Right_shift', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Right_shift'], ['232.M2Lkey217', 'ACCTGTCTCTCT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Lkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Lkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Lkey'], ['232.M2Space217', 'ACGTACTCAGTG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Space217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Space_bar', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Space_bar'], ['232.M2Vkey217', 'ACGCAACTGCTA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Vkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Vkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Vkey'], ['232.M3Gkey217', 'ATAATCTCGTCG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Gkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Gkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Gkey'], ['232.M9Indr217', 'ACTAGCTCCATA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '25', 'years', '0', 'M9Indr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M9', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Xkey217', 'ATAGCTCCATAC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Xkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Xkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Xkey'], ['232.M3Ykey217', 'AGTCTCGCATAT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Ykey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ykey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ykey'], ['232.M3Vkey217', 'ATATCGCTACTG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Vkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Vkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Vkey'], ['232.M2Bkey217', 'ACGCGATACTGG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Bkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Bkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Bkey'], ['232.M3Pinr217', 'AGTACGCTCGAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Pinr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Bkey217', 'ATATGCCAGTGC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Bkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Bkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Bkey'], ['232.M2Akey217', 'ACATGATCGTTC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Akey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Akey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Akey'], ['232.M3Indr217', 'AGGACGCACTGT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Indr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Thmr217', 'AGCTTGACAGCT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Thmr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Skey217', 'ACATGTCACGTG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Skey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Skey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Skey'], ['232.M2Wkey217', 'ACACTAGATCCG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Wkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Wkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Wkey'], ['232.M2Rinr217', 'ACACATGTCTAC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '36', 'years', '0', 'M2Rinr217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M2', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Ckey217', 'ACGATGCGACCA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Ckey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ckey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ckey'], ['232.M3Ekey217', 'AGTCACATCACT', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M3Ekey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M3', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Ekey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Ekey'], ['232.M2Zkey217', 'ACGACGTCTTAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', 'unknown', 'years', '0', 'M2Zkey217', 'False', 'unknown', 'unknown', 'unknown', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:surface', 'ENVO:surface', 'ENVO:surface', 'True', 'M2', '36244', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'Zkey', 'male', '408169', 'Forensic_identification_using_skin_bacterial_communities', 'Zkey'], ['232.M2Thml217', 'AACGCACGCTAG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '36', 'years', '0', 'M2Thml217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M2', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Rinl217', 'AAGAGATGTCGA', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '36', 'years', '0', 'M2Rinl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M2', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M3Rinl217', 'AGCTCTCAGAGG', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '33', 'years', '0', 'M3Rinl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M3', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip'], ['232.M2Midl217', 'AACTGTGCGTAC', 'CATGCTGCCTCCCGTAGGAGT', 'CCME', 'Forensic_identification_using_skin_bacterial_communities', 'EMP', 'Forensic_identification_using_skin_bacterial_communities', 'TCAG', '16S_rRNA_gene_sequences_were_processed_according_to_the_methods_described_in_our_previous_publications_(Fierer_et_al.,_2008;_Hamady_et_al.,_2008)._Briefly,_sequences_<200_or_>300gnt_or_with_average_quality_scores_of_<25_were_removed_from_the_dataset,_as_were_those_with_uncorrectable_barcodes,_ambiguous_bases,_or_if_the_bacterial_16S_rRNA_gene-specific_primer_was_absent._Sequences_were_then_assigned_to_the_specific_subsamples_based_on_their_unique_12nt_barcode_and_then_grouped_into_phylotypes_at_the_97%_level_of_sequence_identity_using_cd-hit_(Li_&_Godzik,_2006)_with_a_minimum_coverage_of_97%._We_chose_to_group_the_phylotypes_at_97%_identity_because_this_matches_the_limits_of_resolution_of_pyrosequencing_(Kunin_et_al.,_2010)_and_because_the_branch_length_so_omitted_contributes_little_to_the_tree_and_therefore_to_phylogenetic_estimates_of___diversity_(Hamady_et_al.,_2009)._A_representative_for_each_phylotype_was_chosen_by_selecting_the_most_abundant_sequence_in_the_phylotype,_with_ties_being_broken_by_choosing_the_longest_sequence._A_phylogenetic_tree_of_the_representative_sequences_was_constructed_using_the_Kimura_2-parameter_model_in_Fast_Tree_(Price_et_al.,_2009)_after_sequences_were_aligned_with_NAST_(minimum_150nt_at_75%_minimum_identity)_(DeSantis_et_al.,_2006a)_against_the_GreenGenes_database_(DeSantis_et_al.,_2006b)._Hypervariable_regions_were_screened_out_of_the_alignment_using_PH_Lane_mask_(http://greengenes.lbl.gov/)._Differences_in_the_community_composition_for_each_pair_of_samples_were_determined_from_the_phylogenetic_tree_using_the_weighted_and_unweighted_UniFrac_algorithms_(Lozupone_&_Knight,_2005;_Lozupone_et_al.,_2006)._UniFrac_is_a_tree-based_metric_that_measures_the_distance_between_two_communities_as_the_fraction_of_branch_length_in_a_phylogenetic_tree_that_is_unique_to_one_of_the_communities_(as_opposed_to_being_shared_by_both)._This_method_of_community_comparison_accounts_for_the_relative_similarities_and_differences_among_phylotypes_(or_higher_taxa)_rather_than_treating_all_taxa_at_a_given_level_of_divergence_as_equal_(Lozupone_&_Knight,_2008)._Although_UniFrac_depends_on_a_phylogenetic_tree,_it_is_relatively_robust_to_differences_in_the_tree_reconstruction_method_or_to_the_approximation_of_using_phylotypes_to_represent_groups_of_very_similar_sequences_(Hamady_et_al.,_2009).', 'CA', 'FLX', '0', 'CCME', '8/14/08', 'FFCKVMW', '1, swab', 'CCME', 'pyrosequencing', 'CCME', '16S rRNA', 'V2', '36', 'years', '0', 'M2Midl217', 'False', 'UBERON:skin', 'UBERON:sebum', 'UBERON:skin', '7/15/08', 'GAZ:United States of America', '0', 'True', '1624', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'ENVO:human-associated habitat', 'True', 'M2', '9606', '40.0083', '-105.2705', 'False', 'fierer_forensic_keyboard', 'completed', 'finger_tip', 'male', '539655', 'Forensic_identification_using_skin_bacterial_communities', 'finger_tip']];\n",
" var axesNames = [0, 1, 2, 3, 4];\n",
"\n",
" var dm, ec;\n",
"\n",
" function init() {\n",
" // Initialize the DecompositionModel\n",
" dm = new DecompositionModel(name, ids, coords, pct_var,\n",
" md_headers, metadata, axesNames);\n",
" // Initialize the EmperorController\n",
" ec = new EmperorController(dm, 'emperor-notebook-0x47105cf9');\n",
" }\n",
"\n",
" function animate() {\n",
" requestAnimationFrame(animate);\n",
" ec.render();\n",
" }\n",
" $(window).resize(function() {\n",
" ec.resize(div.innerWidth(), div.innerHeight());\n",
" });\n",
"\n",
" $(function(){\n",
" init();\n",
" animate();\n",
"\n",
" });\n",
"\n",
"}); // END REQUIRE.JS block\n",
"</script>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<emperor.core.Emperor at 0x1140a9e48>"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
},
"widgets": {
"state": {},
"version": "1.1.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment