Can anybody help me out with a simple PERL program??

last year this course was discounted cuz nobody passed! this year though half the class have had some simple c+ on their curriculum last year and kind of have a grasp. the guy is throwing this coding at us but not really telling us what it means, and on top of that he mumbles and we can't understand his accent. it's worth 2 credits. i am completely lost. there is one book in the library on this (beginning perl for bioinformatics) but between the 30 of us AND the other classes trying to get it,it's impossible to get our hands on. can anybody help me out?

the assignments are here :

http://bioinf.gen.tcd.ie/GE3027/class4/assignments...

1 Answer

Relevance
  • 1 decade ago
    Best Answer

    #!/usr/bin/perl

    use LWP::UserAgent;

    use Data::Dumper;

    use strict;

    our $ua = LWP::UserAgent->new;

    $ua->agent("NuBrowser/10.5 ");

    our %genetic_code = (

    'AAA' => 'K', # Lysine

    'AAC' => 'N', # Asparagine

    'AAG' => 'K', # Lysine

    'AAT' => 'N', # Asparagine

    'ACA' => 'T', # Threonine

    'ACC' => 'T', # Threonine

    'ACG' => 'T', # Threonine

    'ACT' => 'T', # Threonine

    'AGA' => 'R', # Arginine

    'AGC' => 'S', # Serine

    'AGG' => 'R', # Arginine

    'AGT' => 'S', # Serine

    'ATA' => 'I', # Isoleucine

    'ATC' => 'I', # Isoleucine

    'ATG' => 'M', # Methionine

    'ATT' => 'I', # Isoleucine

    'CAA' => 'Q', # Glutamine

    'CAC' => 'H', # Histidine

    'CAG' => 'Q', # Glutamine

    'CAT' => 'H', # Histidine

    'CCA' => 'P', # Proline

    'CCC' => 'P', # Proline

    'CCG' => 'P', # Proline

    'CCT' => 'P', # Proline

    'CGA' => 'R', # Arginine

    'CGC' => 'R', # Arginine

    'CGG' => 'R', # Arginine

    'CGT' => 'R', # Arginine

    'CTA' => 'L', # Leucine

    'CTC' => 'L', # Leucine

    'CTG' => 'L', # Leucine

    'CTT' => 'L', # Leucine

    'GAA' => 'E', # Glutamic Acid

    'GAC' => 'D', # Aspartic Acid

    'GAG' => 'E', # Glutamic Acid

    'GAT' => 'D', # Aspartic Acid

    'GCA' => 'A', # Alanine

    'GCC' => 'A', # Alanine

    'GCG' => 'A', # Alanine

    'GCT' => 'A', # Alanine

    'GGA' => 'G', # Glycine

    'GGC' => 'G', # Glycine

    'GGG' => 'G', # Glycine

    'GGT' => 'G', # Glycine

    'GTA' => 'V', # Valine

    'GTC' => 'V', # Valine

    'GTG' => 'V', # Valine

    'GTT' => 'V', # Valine

    'TAA' => '*', # Stop

    'TAC' => 'Y', # Tyrosine

    'TAG' => '*', # Stop

    'TAT' => 'Y', # Tyrosine

    'TCA' => 'S', # Serine

    'TCC' => 'S', # Serine

    'TCG' => 'S', # Serine

    'TCT' => 'S', # Serine

    'TGA' => '*', # Stop

    'TGC' => 'C', # Cysteine

    'TGG' => 'W', # Tryptofane

    'TGT' => 'C', # Cysteine

    'TTA' => 'L', # Leucine

    'TTC' => 'F', # Phenylalanine

    'TTG' => 'L', # Leucine

    'TTT' => 'F', # Phenylalanine

    );

    my $*** = get("http://bioinf.gen.tcd.ie/ge3027/class4/assignments...

    my @bits = split("<BR>",$***);

    my $dna = $bits[1];

    print "dna\n",$dna;

    print "\nprotein\n", protein($dna),"\n";

    my %fasta1 = getFasta("http://bioinf.gen.tcd.ie/ge3027/class4/fasta1.txt"...

    print "fasta 1 \n", Dumper(\%fasta1);

    my %fasta2 = getFasta("http://bioinf.gen.tcd.ie/ge3027/class4/fasta2.txt"...

    print "fasta 2 \n", Dumper(\%fasta2);

    print "\n4. report the sequence IDs that are found only in the first and not in the second file\n";

    foreach (sort keys %fasta1){

    print "$_ " unless $fasta2{$_};

    }

    print "\n5. report the sequence IDs for which the sequences differ between the two files\n";

    foreach (sort keys %fasta1){

    print "$_ $fasta2{$_} != $fasta1{$_}\n" if $fasta2{$_} != $fasta1{$_};

    }

    print "\n-\n";

    foreach (sort keys %fasta2){

    print "$_ $fasta2{$_} != $fasta1{$_}\n" if $fasta2{$_} != $fasta1{$_};

    }

    sub get($){

    my ($url) = @_;

    my $res = $ua->request(HTTP::Request->new(GET => $url));

    #print " status:", $res->status_line,"\n";

    return $res->content;

    }

    sub getFasta($){

    my ($url) = @_;

    my $f =get($url);

    my @seqs = split /\n{2,3}/,$f;

    my %hash;

    foreach (@seqs) {

    my ($head,$dna) = split "\n";

    $head = substr($head,1);

    $hash{$head} = protein($dna);

    }

    return %hash;

    }

    sub protein($){

    my ($dna) = @_;

    $dna =~ s/\s//g;

    return join "",map { $genetic_code{$_}} unpack("a3" x (length($dna)/3), $dna)

    }

Still have questions? Get your answers by asking now.