perl - Finding longest match between 2 files from pattern -
i having trouble implementing 2 files within program. trying to access contents of file $q
, $s
.
print "input k value \n"; $k = <>; chomp $k; print "input t\n"; $t = <>; chomp $t; %qkmer = (); $i = 1; $query=' '; while ($line=<in>) { chomp($line); if ($line=~ m/^>/ ) { next; } $query=$query.$line; $line=~ s/(^|\n)[\n\s]*/$1/g; while (length($line) >= $k) { $line =~ m/(.{$k})/; if (! defined $qkmer{$1}) {#every key not deined first match $qkmer{$1} = $i; } $i++; $line = substr($line, 1, length($line) -1); } } open(mydata, '<', "data.txt"); while ($line=<mydata>) { \ chomp($line); %skmer = (); # initializes hash called skmer. $j = 1; if ($line=~ m/^>/ ) { #if line starts > next; #start on next line #separated characters } $line=~ s/^\s+|\s+$//g ; #remove spaces file while (length($line) >= $k) { $line =~ m/(.{$k})/;#match k characters , k characters in dna $skmer{$1} = $j; #set key position $j , increase each new key $j++; $line = substr($line, 1, length($line) -1); #this removes first character in current string } ###(56)###for($skmerkey(keys %skmer)){ $i=$skmer{$skmerkey}; if(defined $qkmer($skmerkey)){ $j=$qkmer($skmerkey); } $s1=$line; $s2=$query; @arrays1= split(//, $s1); @array2= split(//, $s2); $l=0; while($arrays1[$i-$l] eq $arrays2[$j-$l]){ $l++; } $start=$i-$l; $m=0; while ($arrays1[$i+$k+$m] eq $arrays2[$j+$k+$m]) { $m++; } $length=$l+$k+$m; $match= substr($s1, $start, $length); if($length>$t){ $longest=length($match); print "longest: $match of length $longest \n"; } } }###(83)###
the input files contain strings of letters. example:
file 1:
ahhtsagnchjgstffhjyfcsghnvzfhg
file2:
ggujvfbgfgkjfcijjjffcvvafcsghnvzfhgvugxckugcbhfcgh ghnvzfhgvugxckhhfgjgcfujvftjbvdtkhvddgjcdgjxdjkfrh ajdbvciyqdanvkjghnvzfhgvugxc
from match of word of length$k
in file 1 in file 2, check match in file 2 left , right of word further matches. final output longest match between file 1 , file 2 based on $k
. ge
with code, syntax error , not suer why because looks correct me:
syntax error @ testk.pl line 56, near "$skmerkey(" syntax error @ testk.pl line 83, near "}"
thank you.
use strict; # <--- allways use use warnings; # <--- , use data::dumper; $k=3; open(my $in, '<', "file2"); # use $in instead of depricated in $line=0; # line number %kmer; # hash of arrays of $k-letter "words" line/position @q; # rows of q-file while(<$in>) { chomp; next if /^>/; s/^\s+|\s+$//g; next if !$_; $pos=0; push @q, $_; # store source row for(/(?=(.{$k}))/g) { # capture $k letters. floating window step 1 symbol push @{$kmer{$_}}, [$line,$pos]; # store row number , position of "word" $pos++; } $line++; } open($in, '<', "file1"); $line=0; while(<$in>) { # read s-file chomp; next if /^>/; s/^\s+|\s+$//g; next if !$_; $pos=0; $len=length($_); # length of row of s-file $s=$_; # current row of s-file @ignore=(); # array store information match tails for(/(?=(.{$k}))/g) { next if ! $kmer{$_}; # "word" not found try next for(@{$kmer{$_}}) { # $kmer{word} contains array of lines/positions in q my($qline, $qpos)=@{$_}; # print "test $qline:$qpos "; if( grep {$_->[0]==$qline && $_->[1]==$qpos } @ignore ) { # line/position tested , included in found matching # print "ignore match tail $qline:$qpos\n"; next; } $j=$k; # $k letters same, test after point $qlen=length($q[$qline]); $j++ while( $pos+$j<$len && $qpos+$j<$qlen && substr($s,$pos+$j,1) eq substr($q[$qline],$qpos+$j,1) ); print "match found: s-file line $line pos $pos, q-file line $qline pos $qpos: ", substr($s,$pos,$j),"\n"; push @ignore, [$qline, $qpos, $j]; # store positions , length of match } } continue { # continue block works on loops, include after "next" $pos++; @ignore=grep { # recalculate/filter position , length of match tails ++$_->[1]; # increment position (--$_->[2]) # decrement length >= $k # , filter out lengths < $k } @ignore; # print dumper(\@ignore); } $line++; }
Comments
Post a Comment