use strict; use warnings; my $file = $ARGV[0]; open (DATAFILE, "<", $file) or die("error:$!"); $file =~ /^([\w\-\_]+)./; my $file_name = $1; my (@name, @data, $samples, $check, $population, @pop_name, $ind, @indiv_num); for (){ chomp($_); if ($_ =~ /^Pop/i){ $population++; $_ =~ /([\w\-\_\.]+)\s*$/; push (@pop_name, $1); unless ($population == 1){ push (@indiv_num, $ind); $ind = 0; } }elsif ($_ =~ /^>\w/){ $_ =~ s/>//; push (@name, $_); $samples++; $ind++; $check = 1; }elsif ($_ =~ /\w/){ if ($check == 1){ push (@data, $_); }else{ $data[-1] = $data[-1].$_; } $check = 0; } } push (@indiv_num, $ind); my $length_check = length ($data[0]); foreach (@data){ my $length_check2 = length($_); unless ($length_check == $length_check2){ print "Error!! Sequence length differ in some samples\n"; exit; } $length_check = $length_check2; } open (OUTPUT, ">", "${file_name}.arp") or die("error:$!"); #Arpfileì¬ print OUTPUT <<"EOF"; [Profile] Title="DNA sequence data" NbSamples=$population GenotypicData=0 DataType=DNA LocusSeparator=NONE MissingData='?' [Data] [[Samples]] EOF my $total_indiv = 0; for (my $n = 0; $n < $population; $n++){ unless ($pop_name[$n] =~ /Pop/i){ print OUTPUT "SampleName=\"$pop_name[$n]\"\n"; }else{ print OUTPUT "SampleName=\"$name[$total_indiv]\"\n"; } print OUTPUT "SampleSize=$indiv_num[$n]\n"; print OUTPUT "SampleData= {\n"; for (my $m = 0; $m < $indiv_num[$n]; $m++){ print OUTPUT "$name[$m+$total_indiv]\t1\t$data[$m+$total_indiv]\n"; } $total_indiv += $indiv_num[$n]; print OUTPUT "}\n"; } print <<"EOF"; The total number of populations = $population The total number of individuals = $samples ${file_name}.arp was created EOF close (DATAFILE); close (OUTPUT);