iracooke · May 7, 2021 07:32
diff --git a/README.md b/README.md
diff --git a/genind.Rmd b/genind.Rmd
 ---
 title: "DAPC Analysis"
 output: html_notebook
 ---


 ```{r}
 library(adegenet)

 # pegas 0.9 on CRAN has a bug.  You need to install the github version to make the loci2genind function work.
 devtools::install_github("emmanuelparadis/pegas",subdir = "pegas")
 library(pegas)

 # Use the read.vcf function from pegas
 vcf <- read.vcf("my_filtered_best.vcf")

 # Convert to genind object
 genind <- loci2genind(vcf)

 # If you get errors about NA values you may need to use the tab function to fill them with appropriate values
 grp <- find.clusters(genind, max.n.clust=40)
 ```

 Continue from here to do a full DAPC analysis.  Good tutorials are available [here](https://github.com/thibautjombart/adegenet/wiki/Tutorials)
diff --git a/pick_best.awk b/pick_best.awk
 BEGIN {
 	best_ns=0
 	best_ns_record="start"
 	previous_marker=-1
 }

 /^#/ {
 	print $0
 }

 !/#/ {
 	this_marker = $3

 	if ( best_ns_record == "start" ){
 		best_ns_record = $0
 		previous_marker = this_marker
 	}

 	# Check to see if we have transitioned to a new marker
 	# if we have then print the best record from the previous marker
 	# and reset best_ns
 	if ( this_marker != previous_marker ){
 		print best_ns_record
 		best_ns = 0
 		best_ns_record = ""
 		previous_marker = this_marker
 	}

 	gt_index=-1
 	ln = split($9,format_data,":")
 	for(i=1;i<ln;i++){
 		if(format_data[i] == "GT"){
 			gt_index=i
 		}
 	}
 	if( gt_index == -1){
 		print "Error: unable to find GT index on line",NR;
 		exit 1
 	}

 	# Extract the NS information from the current record by counting ./. genotypes
 	#
 	this_ns = 0
 	for(i=10;i<=NF;i++){
 		split($i,site_data,":")
 		if( site_data[gt_index] != "./."){
 			this_ns += 1
 		}
 	}


 	# Then check to see if this is better than any other records for this marker
 	# and update best_ns accordingly
 	# if ( match($8,"NS=[0-9]+")) {
 	#this_ns = substr($8,RSTART+3,RLENGTH-3) + 0
 	if ( this_ns > best_ns ){
 		best_ns_record=$0
 		best_ns=this_ns
 	}
 	# } else {
 	# 	print "Error: No NS field found on line",NR;
 	# 	exit 1
 	# }
 }

 END {
 	print best_ns_record
 }
	---
	title: "DAPC Analysis"
	output: html_notebook
	---


	```{r}
	library(adegenet)

	# pegas 0.9 on CRAN has a bug. You need to install the github version to make the loci2genind function work.
	devtools::install_github("emmanuelparadis/pegas",subdir = "pegas")
	library(pegas)

	# Use the read.vcf function from pegas
	vcf <- read.vcf("my_filtered_best.vcf")

	# Convert to genind object
	genind <- loci2genind(vcf)

	# If you get errors about NA values you may need to use the tab function to fill them with appropriate values
	grp <- find.clusters(genind, max.n.clust=40)
	```

	Continue from here to do a full DAPC analysis. Good tutorials are available [here](https://github.com/thibautjombart/adegenet/wiki/Tutorials)
	BEGIN {
	best_ns=0
	best_ns_record="start"
	previous_marker=-1
	}

	/^#/ {
	print $0
	}

	!/#/ {
	this_marker = $3

	if ( best_ns_record == "start" ){
	best_ns_record = $0
	previous_marker = this_marker
	}

	# Check to see if we have transitioned to a new marker
	# if we have then print the best record from the previous marker
	# and reset best_ns
	if ( this_marker != previous_marker ){
	print best_ns_record
	best_ns = 0
	best_ns_record = ""
	previous_marker = this_marker
	}

	gt_index=-1
	ln = split($9,format_data,":")
	for(i=1;i<ln;i++){
	if(format_data[i] == "GT"){
	gt_index=i
	}
	}
	if( gt_index == -1){
	print "Error: unable to find GT index on line",NR;
	exit 1
	}

	# Extract the NS information from the current record by counting ./. genotypes
	#
	this_ns = 0
	for(i=10;i<=NF;i++){
	split($i,site_data,":")
	if( site_data[gt_index] != "./."){
	this_ns += 1
	}
	}


	# Then check to see if this is better than any other records for this marker
	# and update best_ns accordingly
	# if ( match($8,"NS=[0-9]+")) {
	#this_ns = substr($8,RSTART+3,RLENGTH-3) + 0
	if ( this_ns > best_ns ){
	best_ns_record=$0
	best_ns=this_ns
	}
	# } else {
	# print "Error: No NS field found on line",NR;
	# exit 1
	# }
	}

	END {
	print best_ns_record
	}