From 424801df90aa7141ea05b069cada386987e14e73 Mon Sep 17 00:00:00 2001 From: Mike Small Date: Mon, 15 Jun 2020 19:31:19 -0400 Subject: [PATCH] Initial commit. Francais facile script. --- Makefile | 7 +++++++ README | 5 +++++ frfac | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 Makefile create mode 100644 README create mode 100755 frfac diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e141e82 --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +check: + perl -wc frfac + perlcritic frfac + +install: + sudo install -o root -g wheel -m 0755 -d /usr/local/bin + sudo install -o root -g wheel -m 0555 frfac /usr/local/bin diff --git a/README b/README new file mode 100644 index 0000000..b8d8d71 --- /dev/null +++ b/README @@ -0,0 +1,5 @@ +Scripts to extract media URLs from websites + +1. frfac - url of most recent Francais Facile episode from RFI. +Usage: ftp $(frfac) +(or substitute another downloader for ftp.) diff --git a/frfac b/frfac new file mode 100755 index 0000000..f070826 --- /dev/null +++ b/frfac @@ -0,0 +1,63 @@ +#!/usr/pkg/bin/perl +# Print the URI of the most recent rfi francais facile episode. +# Feed this to a downloader. (This script doesn't itself download.) + +use File::Temp; +use HTML::LinkExtractor; +use List::Util 'uniq'; +use Readonly; +use URI; +use URI::Fetch; +use URI::Find; +use strict; +use warnings; +use feature 'say'; + +my $BASE_URI = + 'http://www.rfi.fr/fr/podcasts/journal-fran%C3%A7ais-facile/'; + +# find the list of available episodes + +my $res = URI::Fetch->fetch($BASE_URI) or die URI::Fetch->errstr; +die "URI::Fetch reports failure, stopping" unless $res; +my $epis_list_html = $res->content; + +my @episode_links; +my $LX = HTML::LinkExtractor->new(undef, $BASE_URI); +$LX->parse(\$epis_list_html); +for my $link (@{$LX->links}) { + next unless $link->{href}; + if ($link->{href} =~ /facile-\d+-\d+h\d+-gmt\s*$/) { + push @episode_links, $link->{href}; + } +} +die "Français facile's episode list html has changed, stopping" + unless @episode_links > 0; +@episode_links = sort @episode_links; +#say for @episode_links; +my $latest_episode = pop @episode_links; +#say "Latest: ", $latest_episode; + +# fetch the mp3 from the most recent episode page + +my @mp3s; +sub collect_mp3s { + my ($uri, $uri_text) = @_; + push @mp3s, $uri_text if $uri_text =~ /facile_\d+\.mp3$/; +} + +$res = URI::Fetch->fetch($latest_episode) or die URI::Fetch->errstr; +die "URI::Fetch reports failure, stopping" unless $res; +my $episode_html = $res->content; + +# The page has an inline json script with forward slash escaped. +$episode_html =~ s{\\/}{/}g; + +my $uri_finder = URI::Find->new(\&collect_mp3s); +$uri_finder->find(\$episode_html); + +@mp3s = uniq @mp3s; +die sprintf("Found %d show mp3s instead of one, stopping", scalar(@mp3s)) + unless @mp3s == 1; +say for @mp3s; +