2020-07-03 11:45:52 -04:00
|
|
|
#!/usr/bin/perl
|
2020-06-15 19:31:19 -04:00
|
|
|
# Print the URI of the most recent rfi francais facile episode.
|
|
|
|
# Feed this to a downloader. (This script doesn't itself download.)
|
|
|
|
|
|
|
|
use File::Temp;
|
|
|
|
use HTML::LinkExtractor;
|
|
|
|
use List::Util 'uniq';
|
|
|
|
use Readonly;
|
|
|
|
use URI;
|
|
|
|
use URI::Fetch;
|
|
|
|
use URI::Find;
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
use feature 'say';
|
|
|
|
|
|
|
|
my $BASE_URI =
|
|
|
|
'http://www.rfi.fr/fr/podcasts/journal-fran%C3%A7ais-facile/';
|
|
|
|
|
|
|
|
# find the list of available episodes
|
|
|
|
|
|
|
|
my $res = URI::Fetch->fetch($BASE_URI) or die URI::Fetch->errstr;
|
|
|
|
die "URI::Fetch reports failure, stopping" unless $res;
|
|
|
|
my $epis_list_html = $res->content;
|
|
|
|
|
|
|
|
my @episode_links;
|
|
|
|
my $LX = HTML::LinkExtractor->new(undef, $BASE_URI);
|
|
|
|
$LX->parse(\$epis_list_html);
|
|
|
|
for my $link (@{$LX->links}) {
|
|
|
|
next unless $link->{href};
|
2020-11-26 11:15:37 -05:00
|
|
|
if ($link->{href} =~ /facile.*\d+h\d+-gmt\s*$/) {
|
2020-06-15 19:31:19 -04:00
|
|
|
push @episode_links, $link->{href};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
die "Français facile's episode list html has changed, stopping"
|
|
|
|
unless @episode_links > 0;
|
|
|
|
@episode_links = sort @episode_links;
|
|
|
|
#say for @episode_links;
|
|
|
|
my $latest_episode = pop @episode_links;
|
|
|
|
#say "Latest: ", $latest_episode;
|
|
|
|
|
|
|
|
# fetch the mp3 from the most recent episode page
|
|
|
|
|
|
|
|
my @mp3s;
|
|
|
|
sub collect_mp3s {
|
|
|
|
my ($uri, $uri_text) = @_;
|
2020-11-26 12:16:34 -05:00
|
|
|
push @mp3s, $uri_text if $uri_text =~ /facile_\d+.*\d+\.mp3$/;
|
2020-06-15 19:31:19 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
$res = URI::Fetch->fetch($latest_episode) or die URI::Fetch->errstr;
|
|
|
|
die "URI::Fetch reports failure, stopping" unless $res;
|
|
|
|
my $episode_html = $res->content;
|
|
|
|
|
|
|
|
# The page has an inline json script with forward slash escaped.
|
|
|
|
$episode_html =~ s{\\/}{/}g;
|
|
|
|
|
|
|
|
my $uri_finder = URI::Find->new(\&collect_mp3s);
|
|
|
|
$uri_finder->find(\$episode_html);
|
|
|
|
|
|
|
|
@mp3s = uniq @mp3s;
|
|
|
|
die sprintf("Found %d show mp3s instead of one, stopping", scalar(@mp3s))
|
|
|
|
unless @mp3s == 1;
|
|
|
|
say for @mp3s;
|
|
|
|
|