BEGIN { n = 0; indiv[0] = 0; allele[0] = 0; a = 0; } NF==7 { if (!($2 in indiv)) { indiv[$2] = ++n; } if (!($3 in indiv)) { indiv[$3] = ++n; } if (!($4 in indiv)) { indiv[$4] = ++n; } father[indiv[$2]] = indiv[$3]; mother[indiv[$2]] = indiv[$4]; sex[indiv[$2]] = $5; if (!($6 in allele)) { a++; allele[$6] = $6; } if (!($7 in allele)) { a++; allele[$7] = $7; } genotyping1[indiv[$2]] = allele[$6]; genotyping2[indiv[$2]] = allele[$7]; } END { asort(allele); for (j=1;j<=a+1;j++) { recoding[allele[j]] = j-1; } print "num_individuals = " n ";" print "num_alleles = " a ";" printf("father= [%d",father[1]); for (i=2; i<=n; i++) { printf(",%d",father[i]); } print "];" printf("mother= [%d",mother[1]); for (i=2; i<=n; i++) { printf(",%d",mother[i]); } print "];" printf("sex= [%d",sex[1]); for (i=2; i<=n; i++) { printf(",%d",sex[i]); } print "];" printf("genotyping1= [%d",recoding[genotyping1[1]]); for (i=2; i<=n; i++) { printf(",%d",recoding[genotyping1[i]]); } print "];" printf("genotyping2= [%d",recoding[genotyping2[1]]); for (i=2; i<=n; i++) { printf(",%d",recoding[genotyping2[i]]); } print "];" }