<%doc> Atom feed generator for use in wordle.net. Parameters are debate speaker list where debate = mccain-obama-01, mccain-obama-02, palin-biden speaker = one of the speakers in debate (e.g. mccain or obama ... or ... palin or biden) list = one of POS = adjective adverb noun verb verball all unique.POS both.POS (does not require speaker) all.POS (does not require speaker) pair.POS1.POS2 nphrase.all nphrase.noparent nphrase.nochild <% $debate_key{$debate} %> 2008-10-09 Martin Krzywinski http://mkweb.bcgsc.ca martin.krzywinski@gmail.com <% $title %> debate:speaker:list::<%$debate%>:<%$speaker%>:<%$list%> <% $time %> <% join("\n",@words) %> <%init> use Data::Dumper; use POSIX qw(strftime); use Math::VecStat qw(min max); my %speaker_key = (obama=>"Barack Obama",mccain=>"John McCain",biden=>"Joe Biden",palin=>"Sarah Palin"); my %debate_key = ("mccain-obama-01"=>"Barack Obama vs John McCain (1st debate), 2008 Presidential Debate Series", "mccain-obama-02"=>"Barack Obama vs John McCain (2nd debate), 2008 Presidential Debate Series", "palin-biden"=>"Joe Biden vs Sarah Palin, 2008 Vice-Presidential Debate"); my %debate_key_brief = ("mccain-obama-01"=>"Obama vs McCain (1st)", "mccain-obama-02"=>"Obama vs McCain (2nd)", "palin-biden"=>"Biden vs Palin"); my %speakers = ("mccain-obama-01"=>[qw(obama mccain)], "mccain-obama-02"=>[qw(obama mccain)], "palin-biden"=>[qw(biden palin)]); my @pos = (qw(noun verb verball adjective adverb all)); for my $key (%ARGS) { $ARGS{$key} = lc $ARGS{$key}; } my $dir = "/home/martink/work/circos/projects/debates"; my $debate = $debate_key{$ARGS{debate}} ? $ARGS{debate} : "mccain-obama-01"; my $speaker = grep( $_ eq $ARGS{speaker}, @{$speakers{$debate}}) ? $ARGS{speaker} : $speakers{$debate}[0]; my ($list,$file,$title); if(grep($ARGS{list} eq $_, @pos)) { $list = $ARGS{list}; $file = "parsed/$speaker.pos.$list.txt"; $title = sprintf("%s usage by %s",ucfirst $ARGS{list},$speaker_key{$speaker}); } elsif (grep($ARGS{list} =~ /^(unique|both|all)\.($_)$/, @pos)) { $ARGS{list} =~ /^(unique|both|all)\.(.+)$/; my ($scope,$pos) = ($1,$2); $list = $ARGS{list}; if($scope eq "both") { $file = "parsed/lists/words.$scope.$pos.txt"; $title = sprintf("%s usage by both candidates",ucfirst $pos,$scope); } elsif ($scope eq "all") { $file = "parsed/lists/words.$scope.$pos.txt"; $title = sprintf("%s usage by either candidate",ucfirst $pos); } else { $file = "parsed/lists/words.$speaker.$pos.txt"; $title = sprintf("%s usage by %s",ucfirst $pos,$speaker_key{$speaker}); } } elsif ($ARGS{list} =~ /^nphrase\.(all|noparent|nochild)$/) { $list = $ARGS{list}; my $scope = $1; $file = "parsed/$speaker.nphrase.$scope.txt"; if($scope eq "all") { $title = sprintf("all noun phrases by %s",$speaker_key{$speaker}); } elsif ($scope eq "nochild") { $title = sprintf("simple noun phrases by %s",$speaker_key{$speaker}); } elsif ($scope eq "noparent") { $title = sprintf("complex noun phrases by %s",$speaker_key{$speaker}); } } else { for my $pi (grep($_ ne "verball",@pos)) { for my $pj (grep($_ ne "verball", @pos)) { if($ARGS{list} eq "pair.$pi.$pj") { $list = $ARGS{list}; $file = "parsed/pairs/$speaker.$pi.$pj.txt"; $title = sprintf("%s/%s pairs by %s",ucfirst $pi,ucfirst $pj,$speaker_key{$speaker}); } } } } if(! $list) { $list = "noun"; $file = "parsed/$speaker.pos.$list.txt"; $title = sprintf("%s usage by %s",ucfirst $list,$speaker_key{$speaker}); } $file = "$dir/$debate/$file"; my $time = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime((stat($file))[9]); open(F,$file); my %freq; while() { chomp; s/[^\w\s]//g; my $str = join(" ",split); $freq{$str}++; } close(F); if($ARGS{scale}) { map {$freq{$_} **= 1/$ARGS{scale}} keys %freq; my $minf = min(values %freq); map {$freq{$_} /= $minf} keys %freq; } my @words; for my $entry (sort keys %freq) { my $f = $freq{$entry}; my $str = join("~", split(" ",$entry)); push @words, map { $str } (1..$f); #push @words, sprintf("%s:%d",$str,$f); } <%flags> inherit=>undef