#!/usr/bin/perl # Print the URI of the most recent rfi francais facile episode. # Feed this to a downloader. (This script doesn't itself download.) use File::Temp; use HTML::LinkExtractor; use List::Util 'uniq'; use Readonly; use URI; use URI::Fetch; use URI::Find; use strict; use warnings; use feature 'say'; my $BASE_URI = 'http://www.rfi.fr/fr/podcasts/journal-fran%C3%A7ais-facile/'; # find the list of available episodes my $res = URI::Fetch->fetch($BASE_URI) or die URI::Fetch->errstr; die "URI::Fetch reports failure, stopping" unless $res; my $epis_list_html = $res->content; my @episode_links; my $LX = HTML::LinkExtractor->new(undef, $BASE_URI); $LX->parse(\$epis_list_html); for my $link (@{$LX->links}) { next unless $link->{href}; if ($link->{href} =~ /facile-\d+-\d+h\d+-gmt\s*$/) { push @episode_links, $link->{href}; } } die "Français facile's episode list html has changed, stopping" unless @episode_links > 0; @episode_links = sort @episode_links; #say for @episode_links; my $latest_episode = pop @episode_links; #say "Latest: ", $latest_episode; # fetch the mp3 from the most recent episode page my @mp3s; sub collect_mp3s { my ($uri, $uri_text) = @_; push @mp3s, $uri_text if $uri_text =~ /facile_\d+\.mp3$/; } $res = URI::Fetch->fetch($latest_episode) or die URI::Fetch->errstr; die "URI::Fetch reports failure, stopping" unless $res; my $episode_html = $res->content; # The page has an inline json script with forward slash escaped. $episode_html =~ s{\\/}{/}g; my $uri_finder = URI::Find->new(\&collect_mp3s); $uri_finder->find(\$episode_html); @mp3s = uniq @mp3s; die sprintf("Found %d show mp3s instead of one, stopping", scalar(@mp3s)) unless @mp3s == 1; say for @mp3s;