#!/usr/local/bin/perl $\= "\n"; use Data::Dumper; # this is the list from which we will draw the random base pair - add more if you like :) @bp = qw(a t g c); # explicitly initialize the list of sequences @sequences = (); for (1..1000) { # set the sequence to an empty string $seq = ""; for (1..4) { # add a random base pair $seq = qq{$seq$bp[rand(@bp)]}; } push @sequences, $seq; } # this will be the hash that will store the count of each sequence %sequence_count = (); # iterate through the list of sequences, and for each sequence increment # its count, stored in the hash, by one for $seq (@sequences) { $sequence_count{$seq} = $sequence_count{$seq} + 1; } for $seq (keys %sequence_count) { print qq{sequence $seq seen $sequence_count{$seq} times}; } for $seq (keys %sequence_count) { if ($seq =~ /aaa|ccc|ggg|ttt/) { print qq(3-homo polymer sequence $seq seen $sequence_count{$seq} times); } } %bp_count = (); # method 1 – iterate across sequences, split each sequence into list of characters for $seq (@sequences) { for $bp (split("",$seq)) { $bp_count{$bp} = $bp_count{$bp} + 1; } } print Dumper(\%bp_count); %bp_count = (); for $seq (keys %sequence_count) { for $bp (split("",$seq)) { $bp_count{$bp} = $bp_count{$bp} + $sequence_count{$seq}; } } print Dumper(\%bp_count); $sum = 0; for $count (values %sequence_count) { $sum = $sum + $count; } print "averge sequence count is ",$sum / keys %sequence_count; for $seq (sort {$sequence_count{$b} <=> $sequence_count{$a}} keys %sequence_count) { print qq{sequence $seq seen $sequence_count{$seq} times}; }