This is ridiculous. Who knows anything about PERL??

This is worth 2 credits and we were handed these assignments without being taught cuz our teacher is from the actual computer science faculty and we're just biologists!! He speaks like no english too!!

Write a program that translates a sequence from the start codon on the forward and the reverse strand.

As input use the following sequence:

AATAAATGCCACCGAAGTTCGACCCCAACGAGATCAAGGTCGTATACCTGAGGTGCACCAGAGGTGAAGTCGGTG CCACTTCTGCCCTGGCCCCCAAGATCGGCCCCCTGGGTCTGTCTCCAAAAAAGGTTGGTGATGACATTGC CAAGGCAACGGGTGACTGGAAGGGCCTGAGGATTACAGTGAAACTGACCATTGAGAACAGACAGGCCCAG ATTGAGGTGGTGCCTTCTGCCTCTGCCCCGATCATCAAAGCCCTCAAGAAACCACCAAGAGACAGAAAGA AACAGAAAAACATTAAACACAATGGGAATATCACTTTTGATGAGATCGTCAACATTGCTCGACAGATGCG GCACCGATCCTTAGCCAGAGAACTCTCTGGAACCATTAAAGAGATTCCGGGGACTGCCCAGTCTATGGGC TGTAATGTTGATGGCCACCACCCTCATGACATCATAGATGACATCAACAGTGGTGCTGTGGAATGCCCAG CTAGTTAA

(Store it in a file and read it from that.)

Find the hash with translations for codons here: codon table

Write a Perl script that does the following:

open two files, each containing sequences in FASTA format (Fasta file1, Fasta file2)

read in the identifiers and sequences from each file

report the IDs and lengths of the sequences contained in each file

report the sequence IDs that are found only in the first and not in the second file

report the sequence IDs for which the sequences differ between the two files

Update:

let me again stress, we study BIOLOGY and they threw us in this class and gave us these assignments when half of us struggle with microsoft word! last year they had to discount this course cuz nobody passed. unfortunately they wont do that this year cuz a couple of people dabble in programming.

1 Answer

Relevance
  • 1 decade ago
    Best Answer

    #!/usr/bin/perl

    use LWP::UserAgent;

    use Data::Dumper;

    use strict;

    our $ua = LWP::UserAgent->new;

    $ua->agent("NuBrowser/10.5 ");

    our %genetic_code = (

    'AAA' => 'K', # Lysine

    'AAC' => 'N', # Asparagine

    'AAG' => 'K', # Lysine

    'AAT' => 'N', # Asparagine

    'ACA' => 'T', # Threonine

    'ACC' => 'T', # Threonine

    'ACG' => 'T', # Threonine

    'ACT' => 'T', # Threonine

    'AGA' => 'R', # Arginine

    'AGC' => 'S', # Serine

    'AGG' => 'R', # Arginine

    'AGT' => 'S', # Serine

    'ATA' => 'I', # Isoleucine

    'ATC' => 'I', # Isoleucine

    'ATG' => 'M', # Methionine

    'ATT' => 'I', # Isoleucine

    'CAA' => 'Q', # Glutamine

    'CAC' => 'H', # Histidine

    'CAG' => 'Q', # Glutamine

    'CAT' => 'H', # Histidine

    'CCA' => 'P', # Proline

    'CCC' => 'P', # Proline

    'CCG' => 'P', # Proline

    'CCT' => 'P', # Proline

    'CGA' => 'R', # Arginine

    'CGC' => 'R', # Arginine

    'CGG' => 'R', # Arginine

    'CGT' => 'R', # Arginine

    'CTA' => 'L', # Leucine

    'CTC' => 'L', # Leucine

    'CTG' => 'L', # Leucine

    'CTT' => 'L', # Leucine

    'GAA' => 'E', # Glutamic Acid

    'GAC' => 'D', # Aspartic Acid

    'GAG' => 'E', # Glutamic Acid

    'GAT' => 'D', # Aspartic Acid

    'GCA' => 'A', # Alanine

    'GCC' => 'A', # Alanine

    'GCG' => 'A', # Alanine

    'GCT' => 'A', # Alanine

    'GGA' => 'G', # Glycine

    'GGC' => 'G', # Glycine

    'GGG' => 'G', # Glycine

    'GGT' => 'G', # Glycine

    'GTA' => 'V', # Valine

    'GTC' => 'V', # Valine

    'GTG' => 'V', # Valine

    'GTT' => 'V', # Valine

    'TAA' => '*', # Stop

    'TAC' => 'Y', # Tyrosine

    'TAG' => '*', # Stop

    'TAT' => 'Y', # Tyrosine

    'TCA' => 'S', # Serine

    'TCC' => 'S', # Serine

    'TCG' => 'S', # Serine

    'TCT' => 'S', # Serine

    'TGA' => '*', # Stop

    'TGC' => 'C', # Cysteine

    'TGG' => 'W', # Tryptofane

    'TGT' => 'C', # Cysteine

    'TTA' => 'L', # Leucine

    'TTC' => 'F', # Phenylalanine

    'TTG' => 'L', # Leucine

    'TTT' => 'F', # Phenylalanine

    );

    my $*** = get("http://bioinf.gen.tcd.ie/ge3027/class4/assignments...

    my @bits = split("<BR>",$***);

    my $dna = $bits[1];

    print "dna\n",$dna;

    print "\nprotein\n", protein($dna),"\n";

    my %fasta1 = getFasta("http://bioinf.gen.tcd.ie/ge3027/class4/fasta1.txt"...

    print "fasta 1 \n", Dumper(\%fasta1);

    my %fasta2 = getFasta("http://bioinf.gen.tcd.ie/ge3027/class4/fasta2.txt"...

    print "fasta 2 \n", Dumper(\%fasta2);

    print "\n4. report the sequence IDs that are found only in the first and not in the second file\n";

    foreach (sort keys %fasta1){

    print "$_ " unless $fasta2{$_};

    }

    print "\n5. report the sequence IDs for which the sequences differ between the two files\n";

    foreach (sort keys %fasta1){

    print "$_ $fasta2{$_} != $fasta1{$_}\n" if $fasta2{$_} != $fasta1{$_};

    }

    print "\n-\n";

    foreach (sort keys %fasta2){

    print "$_ $fasta2{$_} != $fasta1{$_}\n" if $fasta2{$_} != $fasta1{$_};

    }

    sub get($){

    my ($url) = @_;

    my $res = $ua->request(HTTP::Request->new(GET => $url));

    #print " status:", $res->status_line,"\n";

    return $res->content;

    }

    sub getFasta($){

    my ($url) = @_;

    my $f =get($url);

    my @seqs = split /\n{2,3}/,$f;

    my %hash;

    foreach (@seqs) {

    my ($head,$dna) = split "\n";

    $head = substr($head,1);

    $hash{$head} = protein($dna);

    }

    return %hash;

    }

    sub protein($){

    my ($dna) = @_;

    $dna =~ s/\s//g;

    return join "",map { $genetic_code{$_}} unpack("a3" x (length($dna)/3), $dna)

    }

Still have questions? Get your answers by asking now.