#!/usr/bin/perl use strict; #READS SIMPLE NEXUS FILES AND MAKES FASTA.. my %matrix; my $inmatrix = 0; my $filename = $ARGV[0]; open(SIN,"<$filename"); while () { if ($inmatrix && m/;/) { $inmatrix = 0; } if ($inmatrix) { #my @stuff = split; my ($name,$seq) = split; # if (scalar(@stuff) > 2) { foreach my $piece (@stuff) { print STDERR $piece."\n"; }; die("split into more than 2 pieces..\n"); } # if ($stuff[0] ne "" && $stuff[0] ne "[" && $stuff[0] ne "]") { if ( $name ne "" && $name ne "[" && $name ne "]") { $matrix{$name} .= $seq; } } if (m/MATRIX/i) { $inmatrix = 1; print STDERR "in matrix\n"; } } close SIN; #foreach my $seq (keys(%matrix)) { # print ">".$seq."\n"; # print $matrix{$seq}."\n"; #} my %allchars; my $firsttaxon = (keys(%matrix))[0]; my $firstlength = length( $matrix{ ((keys(%matrix))[0]) } ) ; for (my $i = 0; $i != $firstlength; $i++) { print STDERR "GETTING CHAR ".$i."\n"; foreach my $seq (keys(%matrix)) { $allchars{$i} .= substr($matrix{$seq},$i,1); #print ">".$seq."\n"; # print $matrix{$seq}."\n"; } } my $deltamin = 0; my $variable = 0; my $varincludinggap = 0; my $parsimonyinformative = 0; my $totalchars = $firstlength; my $totalCIC = 0; foreach my $char (keys(%allchars)) { my %charstates; my $numnonmissing = 0; my $nummissing = 0; my $numgap = 0; for (my $i = 0; $i != length($allchars{$char}); $i++ ) { my $statehere = substr($allchars{$char},$i,1); if ($statehere ne "?" && $statehere ne "-" && $statehere ne "n" && $statehere ne "N" ) { $charstates{$statehere}++; $numnonmissing++; } elsif ($statehere eq "-") { $nummissing++; } elsif ($statehere eq "?" || $statehere eq "n" || $statehere eq "N") { $numgap++; } } print STDERR "------------\n"; print STDERR "CHAR=".($char+1)."\n"; print STDERR $numnonmissing."\t".$nummissing."\t".$numgap."\n"; my $SCminus1; foreach my $state (keys(%charstates)) { print STDERR $state."\t".$charstates{$state}."\n"; if ($charstates{$state} > 1 ) { $SCminus1++ }; } my $CICsofar = 0; if ( $numnonmissing > 3) { for (my $j = 3; $j != $numnonmissing - scalar(keys(%charstates)) + 2; $j++ ) { my $bj = 0; foreach my $state (keys(%charstates)) { if (! ($charstates{$state} < $j) ) { $bj++ }; } my $firstbit = log( (2 * $j) - 3 ) / log(2) ; $CICsofar += ($firstbit * (1-$bj)); # print $j." ".$firstbit." ".$bj." ".($firstbit * (1-$bj))." ".$CICsofar."\n"; } } if ($SCminus1 > 1) { $parsimonyinformative++; } $SCminus1 -= 1; if (scalar(keys(%charstates)) > 1) { $variable++; } if (scalar(keys(%charstates)) > 1 || $numgap > 0) { $varincludinggap++; } if ($SCminus1 < 0) { $SCminus1 = 0; } print STDERR "Sc-1 = ".$SCminus1."\n"; print STDERR "CICchar = ".$CICsofar."\n"; $totalCIC += $CICsofar; $deltamin += $SCminus1; } print STDERR "NCHAR=".($totalchars)."\n"; print STDERR "NTAX=".scalar(keys(%matrix))."\n"; print STDERR "c/t=".( $totalchars / scalar(keys(%matrix)) )."\n"; print STDERR "DELTAmin=".$deltamin."\n"; print STDERR "VAR=".$variable."\n"; print STDERR "VAR INCLUDING GAP=".$varincludinggap."\n"; print STDERR "PARSIMONG INFORMATIVE=".$parsimonyinformative."\n"; print STDERR "CIC=".$totalCIC."\n"; my $matrixname = $filename; $matrixname =~ s/\.NX//; print $matrixname."\t".scalar(keys(%matrix))."\t".$totalchars."\t".$variable."\t".$parsimonyinformative."\t".$deltamin."\t".( $totalchars / scalar(keys(%matrix)) )."\t".$totalCIC."\n";