Initial commit. Francais facile script.

This commit is contained in:
Mike Small 2020-06-15 19:31:19 -04:00
commit 424801df90
3 changed files with 75 additions and 0 deletions

7
Makefile Normal file
View File

@ -0,0 +1,7 @@
check:
perl -wc frfac
perlcritic frfac
install:
sudo install -o root -g wheel -m 0755 -d /usr/local/bin
sudo install -o root -g wheel -m 0555 frfac /usr/local/bin

5
README Normal file
View File

@ -0,0 +1,5 @@
Scripts to extract media URLs from websites
1. frfac - url of most recent Francais Facile episode from RFI.
Usage: ftp $(frfac)
(or substitute another downloader for ftp.)

63
frfac Executable file
View File

@ -0,0 +1,63 @@
#!/usr/pkg/bin/perl
# Print the URI of the most recent rfi francais facile episode.
# Feed this to a downloader. (This script doesn't itself download.)
use File::Temp;
use HTML::LinkExtractor;
use List::Util 'uniq';
use Readonly;
use URI;
use URI::Fetch;
use URI::Find;
use strict;
use warnings;
use feature 'say';
my $BASE_URI =
'http://www.rfi.fr/fr/podcasts/journal-fran%C3%A7ais-facile/';
# find the list of available episodes
my $res = URI::Fetch->fetch($BASE_URI) or die URI::Fetch->errstr;
die "URI::Fetch reports failure, stopping" unless $res;
my $epis_list_html = $res->content;
my @episode_links;
my $LX = HTML::LinkExtractor->new(undef, $BASE_URI);
$LX->parse(\$epis_list_html);
for my $link (@{$LX->links}) {
next unless $link->{href};
if ($link->{href} =~ /facile-\d+-\d+h\d+-gmt\s*$/) {
push @episode_links, $link->{href};
}
}
die "Français facile's episode list html has changed, stopping"
unless @episode_links > 0;
@episode_links = sort @episode_links;
#say for @episode_links;
my $latest_episode = pop @episode_links;
#say "Latest: ", $latest_episode;
# fetch the mp3 from the most recent episode page
my @mp3s;
sub collect_mp3s {
my ($uri, $uri_text) = @_;
push @mp3s, $uri_text if $uri_text =~ /facile_\d+\.mp3$/;
}
$res = URI::Fetch->fetch($latest_episode) or die URI::Fetch->errstr;
die "URI::Fetch reports failure, stopping" unless $res;
my $episode_html = $res->content;
# The page has an inline json script with forward slash escaped.
$episode_html =~ s{\\/}{/}g;
my $uri_finder = URI::Find->new(\&collect_mp3s);
$uri_finder->find(\$episode_html);
@mp3s = uniq @mp3s;
die sprintf("Found %d show mp3s instead of one, stopping", scalar(@mp3s))
unless @mp3s == 1;
say for @mp3s;