%doc>
Atom feed generator for use in wordle.net. Parameters are
debate
speaker
list
where
debate = mccain-obama-01, mccain-obama-02, palin-biden
speaker = one of the speakers in debate (e.g. mccain or obama ... or ... palin or biden)
list = one of
POS =
adjective
adverb
noun
verb
verball
all
unique.POS
both.POS (does not require speaker)
all.POS (does not require speaker)
pair.POS1.POS2
nphrase.all
nphrase.noparent
nphrase.nochild
%doc>
<% $debate_key{$debate} %>
2008-10-09
Martin Krzywinski
http://mkweb.bcgsc.ca
martin.krzywinski@gmail.com
<% $title %>
debate:speaker:list::<%$debate%>:<%$speaker%>:<%$list%>
<% $time %>
<% join("\n",@words) %>
<%init>
use Data::Dumper;
use POSIX qw(strftime);
use Math::VecStat qw(min max);
my %speaker_key = (obama=>"Barack Obama",mccain=>"John McCain",biden=>"Joe Biden",palin=>"Sarah Palin");
my %debate_key = ("mccain-obama-01"=>"Barack Obama vs John McCain (1st debate), 2008 Presidential Debate Series",
"mccain-obama-02"=>"Barack Obama vs John McCain (2nd debate), 2008 Presidential Debate Series",
"palin-biden"=>"Joe Biden vs Sarah Palin, 2008 Vice-Presidential Debate");
my %debate_key_brief = ("mccain-obama-01"=>"Obama vs McCain (1st)",
"mccain-obama-02"=>"Obama vs McCain (2nd)",
"palin-biden"=>"Biden vs Palin");
my %speakers = ("mccain-obama-01"=>[qw(obama mccain)],
"mccain-obama-02"=>[qw(obama mccain)],
"palin-biden"=>[qw(biden palin)]);
my @pos = (qw(noun verb verball adjective adverb all));
for my $key (%ARGS) {
$ARGS{$key} = lc $ARGS{$key};
}
my $dir = "/home/martink/work/circos/projects/debates";
my $debate = $debate_key{$ARGS{debate}} ? $ARGS{debate} : "mccain-obama-01";
my $speaker = grep( $_ eq $ARGS{speaker}, @{$speakers{$debate}}) ? $ARGS{speaker} : $speakers{$debate}[0];
my ($list,$file,$title);
if(grep($ARGS{list} eq $_, @pos)) {
$list = $ARGS{list};
$file = "parsed/$speaker.pos.$list.txt";
$title = sprintf("%s usage by %s",ucfirst $ARGS{list},$speaker_key{$speaker});
} elsif (grep($ARGS{list} =~ /^(unique|both|all)\.($_)$/, @pos)) {
$ARGS{list} =~ /^(unique|both|all)\.(.+)$/;
my ($scope,$pos) = ($1,$2);
$list = $ARGS{list};
if($scope eq "both") {
$file = "parsed/lists/words.$scope.$pos.txt";
$title = sprintf("%s usage by both candidates",ucfirst $pos,$scope);
} elsif ($scope eq "all") {
$file = "parsed/lists/words.$scope.$pos.txt";
$title = sprintf("%s usage by either candidate",ucfirst $pos);
} else {
$file = "parsed/lists/words.$speaker.$pos.txt";
$title = sprintf("%s usage by %s",ucfirst $pos,$speaker_key{$speaker});
}
} elsif ($ARGS{list} =~ /^nphrase\.(all|noparent|nochild)$/) {
$list = $ARGS{list};
my $scope = $1;
$file = "parsed/$speaker.nphrase.$scope.txt";
if($scope eq "all") {
$title = sprintf("all noun phrases by %s",$speaker_key{$speaker});
} elsif ($scope eq "nochild") {
$title = sprintf("simple noun phrases by %s",$speaker_key{$speaker});
} elsif ($scope eq "noparent") {
$title = sprintf("complex noun phrases by %s",$speaker_key{$speaker});
}
} else {
for my $pi (grep($_ ne "verball",@pos)) {
for my $pj (grep($_ ne "verball", @pos)) {
if($ARGS{list} eq "pair.$pi.$pj") {
$list = $ARGS{list};
$file = "parsed/pairs/$speaker.$pi.$pj.txt";
$title = sprintf("%s/%s pairs by %s",ucfirst $pi,ucfirst $pj,$speaker_key{$speaker});
}
}
}
}
if(! $list) {
$list = "noun";
$file = "parsed/$speaker.pos.$list.txt";
$title = sprintf("%s usage by %s",ucfirst $list,$speaker_key{$speaker});
}
$file = "$dir/$debate/$file";
my $time = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime((stat($file))[9]);
open(F,$file);
my %freq;
while() {
chomp;
s/[^\w\s]//g;
my $str = join(" ",split);
$freq{$str}++;
}
close(F);
if($ARGS{scale}) {
map {$freq{$_} **= 1/$ARGS{scale}} keys %freq;
my $minf = min(values %freq);
map {$freq{$_} /= $minf} keys %freq;
}
my @words;
for my $entry (sort keys %freq) {
my $f = $freq{$entry};
my $str = join("~", split(" ",$entry));
push @words, map { $str } (1..$f);
#push @words, sprintf("%s:%d",$str,$f);
}
%init>
<%flags>
inherit=>undef
%flags>