#!/usr/bin/perl -w ############################################################## #written by Belinda Giardine 2006 ############################################################## use strict; my $seqFile = shift @ARGV; my $inName = shift @ARGV; if (!$seqFile or !$inName) { die "USAGE: sequenceCheck file.fa 'hgvsName'\n"; } my @mut; my $fetched = ''; if ($inName =~ /\[(.*;.*)\]/) { #multiple mutations my $mut = $1; @mut = split(/;/, $mut) }elsif ($inName =~ /\[.*?\]\+\[.*\]/) { #NEED to split these }elsif ($inName =~ /\[(.*\(\+\).*)\]/) { my $mut = $1; @mut = split(/\(\+\)/, $mut); }elsif ($inName =~ /((c|g)\.)\[\.\.(.*)\.\.\]/) { #abreviated name $mut[0] = "$1$3"; }else { $mut[0] = $inName; } foreach my $name (@mut) { if ($name =~ /c\.\d+[+-]\d+/) { print "ERROR $name: can't lookup introns\n"; }elsif ($name =~ /c\.\*\d+/) { print "ERROR $name: can't lookup past the stop codon\n"; }elsif ($name =~ /p\.\w+\d+\w+/) { print "ERROR $name: can't lookup proteins\n"; }elsif ($name =~ /(g|c)\.(\d+_?\d*)(del|dup)([ACTG]+)$/) { my $pos = $2; my $pos2; my $type = $3; my $nt = $4; if ($pos =~ /_(\d+)/) { $pos2 = $1; $pos =~ s/_.*//; }else { $pos2 = $pos; } if ($type eq 'dup' && $pos != $pos2) { #can't check same as insertion print "ERROR $name: can't do duplications with ranges\n"; next; } my $dna = fetchDna($pos, $pos2); if ($dna eq '') { print "ERROR Failed to fetch dna 'dna $pos,$pos $seqFile' for $name.\n"; print "GOT $fetched\n"; exit; } if ($dna ne $nt) { my $rev = reverse (split(/ */, $nt)); if ($dna ne $rev) { print "$name: Sequence doesn't match refseq=$dna name=$nt\n"; }else { print "$name: Sequence matched\n"; } }else { print "$name: Sequence matched\n"; } }elsif ($name =~ /(g|c)\.(\d+)([ACTG])>[ACTG]/) { my $pos = $2; my $nt = $3; my $dna = fetchDna($pos, $pos); if (!$dna or $dna eq '') { print "ERROR Failed to fetch dna 'dna $pos,$pos $seqFile' for $name.\n"; print "GOT $fetched\n"; exit; } if ($dna && $dna ne $nt) { print "$name: Sequence doesn't match refseq=$dna name=$nt\n"; }elsif (!$dna) { print "$name: ERROR couldn't fetch dna\n"; print "GOT $fetched\n"; }else { print "$name: Sequence matched\n"; } }else { print "ERROR can't do $name\n"; } } exit 0; sub fetchDna { my $pos = shift @_; my $pos2 = shift @_; my $fh; my $dna = ''; my $good; open ($fh, "getSubSeq $pos,$pos2 $seqFile 2>&1 |") or die "Couldn't run getSubSeq, $!\n"; while (<$fh>) { chomp; if (/^\>getSubSeq/) { $good = 1; next; } #must not have errors if show command $dna .= $_; $fetched = $_; } close $fh or return undef; if (!$good or $dna eq '') { undef $dna; } if ($dna) { $dna = uc($dna); } return $dna; } ####End