#!/usr/bin/perl use strict; use warnings; # szabgab is playing police on CPAN # Fetch the recently uploaded modules and # run CPANTS_lin on then # offer to send an e-mail with some notes in case some of the errors # save the name of the module authors who have already received an e-mail # save the date when it was run last time (or just run it on files uploade in the last X hours or days # maybe later send e-mail use YAML qw(LoadFile DumpFile); use File::HomeDir; use File::Spec::Functions qw(catfile); use DateTime; use DateTime::Format::Strptime; use Data::Dumper qw(Dumper); use Mail::Sendmail qw(sendmail); use autodie; # need 1.994 die "Check the time TODO in the code!" if 1900 + (localtime)[5] > 2008; my $dir = catfile( File::HomeDir->my_home, '.cpanpolice' ); my $ymlfile = catfile ( $dir, 'police.yml'); my $mca = "/home/gabor/work/Module-CPANTS-Analyse"; my $lint = "perl -I$mca/lib $mca/bin/cpants_lint.pl"; my $cpan = "/home/gabor/download/cpan"; mkdir $dir if not -d $dir; my $data; if (-e $ymlfile) { $data = LoadFile($ymlfile); } my $message = ''; my $time = localtime; my @recent = get_recent(); my $n; #print "$_->{date} $_->{file}\n" for @recent; foreach my $e (@recent) { my $path = catfile($cpan, $e->{file}); if (not -e $path) { #print "$e->{date} $e->{file}\n"; next; # TODO later deal with missing files issue # in case of two uploads are ok as we are looking at a minicpan and we will only have the most recent # it might be ok with development versions such as _01 as those are not mirrored by CPAN::Mini I think # During development I saw total 355 and existing 235 files (the recent file has 7 days of data) } $n++; my ($author, $file) = (split m{/}, $e->{file})[-2, -1]; next if $data->{authors}{$author}; # already sent once #print "$path\n"; #print "$lint $path\n"; my @out = qx{$lint $path}; my @errors = grep {$_ !~ m{^(Checked dist|Kwalitee rating|Ignoring metrics)} } @out; #print @errors; if (grep /metayml_has_license/, @errors) { $data->{authors}{$author} = { time => $time, file => $e->{file}, }; $file =~ s/\.(tar\.gz|tgz)$//; $message .= "http://search.cpan.org/~$author/$file/\n"; $message .= "http://search.cpan.org/~$author\n\n"; } #last if $n > 100; } if ($message) { print "sending message\n$message"; sendmail( To => 'szabgab@gmail.com', Subject => "CPAN Police report at $time", From => 'gabor@pti.co.il', Message => $message, ) or print $Mail::Sendmail::error; } print "Number of files found: $n\n"; DumpFile($ymlfile, $data); print "done\n"; sub get_recent { use LWP::Simple; my $recent = catfile ($dir, 'RECENT-ls'); mirror 'http://www.cpan.org/indices/RECENT-ls', $recent; my $dt = DateTime::Format::Strptime->new ( pattern => '%Y %b %d %H:%M', locale => 'en_USA', on_error => 'croak', ); # my $year = 1900 + (localtime)[5]; my @entries; open my $fh, '<', $recent; my $c; while (my $line = <$fh>) { next if $line !~ m{\d \./authors/id}; next if $line =~ /(readme|meta|CHECKSUMS)$/; chomp $line; $line =~ s/^\s+//; #print $line; my ($month, $day, $hm, $file) = (split /\s+/, $line)[7..10]; $file = substr $file, 2; my $date = $dt->parse_datetime("2008 $month $day $hm"); # TODO see what happens at the end of the year with the RECENT-ls file ? $c++; #print "$month $day $hour $min $file\n"; push @entries, { file => $file, date => $date }; die "$line\n$month '$day' $hm\n" if not defined $date; } print "Number of entries found in RECENT-ls: $c\n"; return sort { DateTime->compare($b->{date}, $a->{date}) } @entries; }