yt-dlp/test/test_download.py

#!/usr/bin/env python

import errno
import hashlib
import io
import os
import json
import unittest
import sys
import socket
import binascii

# Allow direct execution
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import youtube_dl.FileDownloader
import youtube_dl.InfoExtractors
from youtube_dl.utils import *

DEF_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tests.json')
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")

RETRIES = 3

# General configuration (from __init__, not very elegant...)
jar = compat_cookiejar.CookieJar()
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
proxy_handler = compat_urllib_request.ProxyHandler()
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(10)

def _try_rm(filename):
    """ Remove a file if it exists """
    try:
        os.remove(filename)
    except OSError as ose:
        if ose.errno != errno.ENOENT:
            raise

md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()

class FileDownloader(youtube_dl.FileDownloader):
    def __init__(self, *args, **kwargs):
        self.to_stderr = self.to_screen
        self.processed_info_dicts = []
        return youtube_dl.FileDownloader.__init__(self, *args, **kwargs)
    def report_warning(self, message):
        # Don't accept warnings during tests
        raise ExtractorError(message)
    def process_info(self, info_dict):
        self.processed_info_dicts.append(info_dict)
        return youtube_dl.FileDownloader.process_info(self, info_dict)

def _file_md5(fn):
    with open(fn, 'rb') as f:
        return hashlib.md5(f.read()).hexdigest()

with io.open(DEF_FILE, encoding='utf-8') as deff:
    defs = json.load(deff)
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
    parameters = json.load(pf)


class TestDownload(unittest.TestCase):
    maxDiff = None
    def setUp(self):
        self.parameters = parameters
        self.defs = defs

### Dynamically generate tests
def generator(test_case):

    def test_template(self):
        ie = youtube_dl.InfoExtractors.get_info_extractor(test_case['name'])
        if not ie._WORKING:
            print('Skipping: IE marked as not _WORKING')
            return
        if 'playlist' not in test_case and not test_case['file']:
            print('Skipping: No output file specified')
            return
        if 'skip' in test_case:
            print('Skipping: {0}'.format(test_case['skip']))
            return

        params = self.parameters.copy()
        params.update(test_case.get('params', {}))

        fd = FileDownloader(params)
        for ie in youtube_dl.InfoExtractors.gen_extractors():
            fd.add_info_extractor(ie)
        finished_hook_called = set()
        def _hook(status):
            if status['status'] == 'finished':
                finished_hook_called.add(status['filename'])
        fd.add_progress_hook(_hook)

        test_cases = test_case.get('playlist', [test_case])
        for tc in test_cases:
            _try_rm(tc['file'])
            _try_rm(tc['file'] + '.part')
            _try_rm(tc['file'] + '.info.json')
        try:
            for retry in range(1, RETRIES + 1):
                try:
                    fd.download([test_case['url']])
                except (DownloadError, ExtractorError) as err:
                    if retry == RETRIES: raise

                    # Check if the exception is not a network related one
                    if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
                        raise

                    print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry))
                else:
                    break

            for tc in test_cases:
                if not test_case.get('params', {}).get('skip_download', False):
                    self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
                    self.assertTrue(tc['file'] in finished_hook_called)
                self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
                if 'md5' in tc:
                    md5_for_file = _file_md5(tc['file'])
                    self.assertEqual(md5_for_file, tc['md5'])
                with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
                    info_dict = json.load(infof)
                for (info_field, expected) in tc.get('info_dict', {}).items():
                    if isinstance(expected, compat_str) and expected.startswith('md5:'):
                        self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))
                    else:
                        got = info_dict.get(info_field)
                        self.assertEqual(
                            expected, got,
                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))

                # If checkable fields are missing from the test case, print the info_dict
                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
                    for key, value in info_dict.items()
                    if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location'))
                if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n')

                # Check for the presence of mandatory fields
                for key in ('id', 'url', 'title', 'ext'):
                    self.assertTrue(key in info_dict.keys() and info_dict[key])
        finally:
            for tc in test_cases:
                _try_rm(tc['file'])
                _try_rm(tc['file'] + '.part')
                _try_rm(tc['file'] + '.info.json')

    return test_template

### And add them to TestDownload
for test_case in defs:
    test_method = generator(test_case)
    test_method.__name__ = "test_{0}".format(test_case["name"])
    setattr(TestDownload, test_method.__name__, test_method)
    del test_method


if __name__ == '__main__':
    unittest.main()
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00			`#!/usr/bin/env python`

Adapt test_download to support playlists, and remove race conditions 2013-01-01 13:30:29 -05:00			`import errno`
adding download test with md5 check 2012-09-28 09:34:56 -04:00			`import hashlib`
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00			`import io`
adding download test with md5 check 2012-09-28 09:34:56 -04:00			`import os`
correction on the test 2012-10-15 07:01:36 -04:00			`import json`
Update download tests 2012-11-28 09:09:56 -05:00			`import unittest`
			`import sys`
better Vimeo tests; fixed a couple of VimeoIE fields 2012-12-20 10:30:55 -05:00			`import socket`
test: extend the reach of info_dict checking * print the info_dict in a format suitable to easy adding to tests.json during tests if un-tested fields are detected * make it possible to put the crc32 in tests.json if the field is too long * complete the "info_dict" fields in existing tests * fixed the bugs catched doing this 2013-06-09 08:21:42 -04:00			`import binascii`
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00
			`# Allow direct execution`
			`sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))`
Update download tests 2012-11-28 09:09:56 -05:00
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00			`import youtube_dl.FileDownloader`
			`import youtube_dl.InfoExtractors`
			`from youtube_dl.utils import *`
test automation 2012-12-11 21:55:06 -05:00
			`DEF_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tests.json')`
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00			`PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")`

Bubble up all the stack of exceptions and retry download tests on timeout errors 2013-03-09 04:05:43 -05:00			`RETRIES = 3`

streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00			`# General configuration (from __init__, not very elegant...)`
			`jar = compat_cookiejar.CookieJar()`
			`cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)`
			`proxy_handler = compat_urllib_request.ProxyHandler()`
			`opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())`
			`compat_urllib_request.install_opener(opener)`
Aggressive test timeout to catch hanging servers 2013-01-12 14:33:03 -05:00			`socket.setdefaulttimeout(10)`
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00
Adapt test_download to support playlists, and remove race conditions 2013-01-01 13:30:29 -05:00			`def _try_rm(filename):`
			`""" Remove a file if it exists """`
			`try:`
			`os.remove(filename)`
			`except OSError as ose:`
			`if ose.errno != errno.ENOENT:`
			`raise`

switch long info_dict fields checking to md5 2013-06-09 09:03:54 -04:00			`md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()`
test: extend the reach of info_dict checking * print the info_dict in a format suitable to easy adding to tests.json during tests if un-tested fields are detected * make it possible to put the crc32 in tests.json if the field is too long * complete the "info_dict" fields in existing tests * fixed the bugs catched doing this 2013-06-09 08:21:42 -04:00
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00			`class FileDownloader(youtube_dl.FileDownloader):`
			`def __init__(self, args, *kwargs):`
			`self.to_stderr = self.to_screen`
add info_dict testing to test_download 2012-12-20 08:14:43 -05:00			`self.processed_info_dicts = []`
			`return youtube_dl.FileDownloader.__init__(self, args, *kwargs)`
print WARNINGs during test + minor fix to NBAIE 2013-06-06 09:07:05 -04:00			`def report_warning(self, message):`
raise exceptions on warnings during tests - and solve a couple of them 2013-06-07 05:19:27 -04:00			`# Don't accept warnings during tests`
			`raise ExtractorError(message)`
add info_dict testing to test_download 2012-12-20 08:14:43 -05:00			`def process_info(self, info_dict):`
			`self.processed_info_dicts.append(info_dict)`
			`return youtube_dl.FileDownloader.process_info(self, info_dict)`
test automation 2012-12-11 21:55:06 -05:00
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00			`def _file_md5(fn):`
			`with open(fn, 'rb') as f:`
			`return hashlib.md5(f.read()).hexdigest()`

			`with io.open(DEF_FILE, encoding='utf-8') as deff:`
			`defs = json.load(deff)`
			`with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:`
			`parameters = json.load(pf)`
test automation 2012-12-11 21:55:06 -05:00
add info_dict testing to test_download 2012-12-20 08:14:43 -05:00
test automation 2012-12-11 21:55:06 -05:00			`class TestDownload(unittest.TestCase):`
Show whole diff in error cases 2013-04-11 12:38:43 -04:00			`maxDiff = None`
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00			`def setUp(self):`
			`self.parameters = parameters`
			`self.defs = defs`

typo 2013-01-01 13:07:01 -05:00			`### Dynamically generate tests`
Revert "Don't be too clever" This reverts commit a276e060806c6cabc76b9df964db67939b643e43. 2012-12-12 09:14:58 -05:00			`def generator(test_case):`

test automation 2012-12-11 21:55:06 -05:00			`def test_template(self):`
Remove a commented line I forgot. [ci skip] 2013-04-30 08:21:46 -04:00			`ie = youtube_dl.InfoExtractors.get_info_extractor(test_case['name'])`
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00			`if not ie._WORKING:`
			`print('Skipping: IE marked as not _WORKING')`
			`return`
Adapt test_download to support playlists, and remove race conditions 2013-01-01 13:30:29 -05:00			`if 'playlist' not in test_case and not test_case['file']:`
Revert "In tests.json file and md5 join in a 'files' list to handle multiple-file IEs" This made the JSON structure really unreadable and was a quick fix. This reverts commit 6535e9511fc18eee2fc640c77fd42a4a39791915. 2013-01-01 13:07:06 -05:00			`print('Skipping: No output file specified')`
			`return`
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00			`if 'skip' in test_case:`
			`print('Skipping: {0}'.format(test_case['skip']))`
			`return`
add info_dict testing to test_download 2012-12-20 08:14:43 -05:00
Simplify test parameter initialization 2013-01-01 13:34:54 -05:00			`params = self.parameters.copy()`
			`params.update(test_case.get('params', {}))`
add info_dict testing to test_download 2012-12-20 08:14:43 -05:00
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00			`fd = FileDownloader(params)`
import all IEs when testing to resemble more closely the real env 2013-03-30 21:12:28 -04:00			`for ie in youtube_dl.InfoExtractors.gen_extractors():`
			`fd.add_info_extractor(ie)`
Download progress hooks 2013-01-12 14:34:50 -05:00			`finished_hook_called = set()`
			`def _hook(status):`
			`if status['status'] == 'finished':`
			`finished_hook_called.add(status['filename'])`
			`fd.add_progress_hook(_hook)`
Adapt test_download to support playlists, and remove race conditions 2013-01-01 13:30:29 -05:00
			`test_cases = test_case.get('playlist', [test_case])`
			`for tc in test_cases:`
			`_try_rm(tc['file'])`
Remove .part files before and after tests 2013-01-01 15:15:52 -05:00			`_try_rm(tc['file'] + '.part')`
Adapt test_download to support playlists, and remove race conditions 2013-01-01 13:30:29 -05:00			`_try_rm(tc['file'] + '.info.json')`
			`try:`
Bubble up all the stack of exceptions and retry download tests on timeout errors 2013-03-09 04:05:43 -05:00			`for retry in range(1, RETRIES + 1):`
			`try:`
			`fd.download([test_case['url']])`
			`except (DownloadError, ExtractorError) as err:`
			`if retry == RETRIES: raise`

			`# Check if the exception is not a network related one`
retry on UnavailableVideoError 2013-03-30 21:29:34 -04:00			`if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):`
Bubble up all the stack of exceptions and retry download tests on timeout errors 2013-03-09 04:05:43 -05:00			`raise`

			`print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry))`
			`else:`
			`break`
Adapt test_download to support playlists, and remove race conditions 2013-01-01 13:30:29 -05:00
			`for tc in test_cases:`
add test for infoq 2013-01-01 15:01:49 -05:00			`if not test_case.get('params', {}).get('skip_download', False):`
Switch ComedyCentral test to a permanent URL (They delete full episodes older than a month) 2013-02-01 11:46:03 -05:00			`self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])`
Download progress hooks 2013-01-12 14:34:50 -05:00			`self.assertTrue(tc['file'] in finished_hook_called)`
Adapt test_download to support playlists, and remove race conditions 2013-01-01 13:30:29 -05:00			`self.assertTrue(os.path.exists(tc['file'] + '.info.json'))`
			`if 'md5' in tc:`
			`md5_for_file = _file_md5(tc['file'])`
			`self.assertEqual(md5_for_file, tc['md5'])`
			`with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:`
			`info_dict = json.load(infof)`
Improve error reporting for downloads 2013-06-23 15:33:11 -04:00			`for (info_field, expected) in tc.get('info_dict', {}).items():`
			`if isinstance(expected, compat_str) and expected.startswith('md5:'):`
			`self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))`
test: extend the reach of info_dict checking * print the info_dict in a format suitable to easy adding to tests.json during tests if un-tested fields are detected * make it possible to put the crc32 in tests.json if the field is too long * complete the "info_dict" fields in existing tests * fixed the bugs catched doing this 2013-06-09 08:21:42 -04:00			`else:`
Improve error reporting for downloads 2013-06-23 15:33:11 -04:00			`got = info_dict.get(info_field)`
			`self.assertEqual(`
			`expected, got,`
			`u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))`
test: extend the reach of info_dict checking * print the info_dict in a format suitable to easy adding to tests.json during tests if un-tested fields are detected * make it possible to put the crc32 in tests.json if the field is too long * complete the "info_dict" fields in existing tests * fixed the bugs catched doing this 2013-06-09 08:21:42 -04:00
			`# If checkable fields are missing from the test case, print the info_dict`
switch long info_dict fields checking to md5 2013-06-09 09:03:54 -04:00			`test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))`
test: extend the reach of info_dict checking * print the info_dict in a format suitable to easy adding to tests.json during tests if un-tested fields are detected * make it possible to put the crc32 in tests.json if the field is too long * complete the "info_dict" fields in existing tests * fixed the bugs catched doing this 2013-06-09 08:21:42 -04:00			`for key, value in info_dict.items()`
			`if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location'))`
			`if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):`
			`sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n')`

			`# Check for the presence of mandatory fields`
			`for key in ('id', 'url', 'title', 'ext'):`
			`self.assertTrue(key in info_dict.keys() and info_dict[key])`
Adapt test_download to support playlists, and remove race conditions 2013-01-01 13:30:29 -05:00			`finally:`
			`for tc in test_cases:`
			`_try_rm(tc['file'])`
Remove .part files before and after tests 2013-01-01 15:15:52 -05:00			`_try_rm(tc['file'] + '.part')`
Adapt test_download to support playlists, and remove race conditions 2013-01-01 13:30:29 -05:00			`_try_rm(tc['file'] + '.info.json')`
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00
test automation 2012-12-11 21:55:06 -05:00			`return test_template`
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00
Revert "Don't be too clever" This reverts commit a276e060806c6cabc76b9df964db67939b643e43. 2012-12-12 09:14:58 -05:00			`### And add them to TestDownload`
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00			`for test_case in defs:`
Revert "Don't be too clever" This reverts commit a276e060806c6cabc76b9df964db67939b643e43. 2012-12-12 09:14:58 -05:00			`test_method = generator(test_case)`
streamlined and simplified dynamic tests generation; readded a couple of test features 2012-12-12 08:15:21 -05:00			`test_method.__name__ = "test_{0}".format(test_case["name"])`
			`setattr(TestDownload, test_method.__name__, test_method)`
Revert "Don't be too clever" This reverts commit a276e060806c6cabc76b9df964db67939b643e43. 2012-12-12 09:14:58 -05:00			`del test_method`
Update download tests 2012-11-28 09:09:56 -05:00

			`if __name__ == '__main__':`
			`unittest.main()`