const puppeteer = require('puppeteer'); const readline = require('readline'); const fs = require('fs'); const needle = require('needle'); const { execSync } = require('child_process'); async function readLine(label, muted=false) { const rl = readline.createInterface({ input: process.stdin, output: process.stdout }); rl.stdoutMuted = muted; rl._writeToOutput = function _writeToOutput(str) { if (rl.stdoutMuted) rl.output.write("*"); else rl.output.write(str); }; return new Promise(resolve => { rl.question(label + ': ', (answer) => { rl.close(); resolve(answer); }); }); } (async () => { const browser = await puppeteer.launch({headless: true}); const page = await browser.newPage(); page.setViewport({width: 1366, height: 768}); await page.goto("https://bibliotheque.insa-lyon.fr/parcours-recherche/panorama_des_ressources"); await Promise.all([ // accéder à mon compte page.click('#boite_13 > a'), page.waitForNavigation(), ]); await Promise.all([ // s'identifier page.click('#boite_23 > div.widget-body.card-body > div > button'), page.waitForNavigation(), ]); await page.click('#username'); const username = await readLine('login'); await page.keyboard.type(username); await page.click('#password'); console.log('password:'); const password = await readLine('', true); await page.keyboard.type(password); await Promise.all([ // soumission formulaire identification CAS page.click('#fm1 > div.row.btn-row > input.btn-submit'), page.waitForNavigation(), ]); await page.goto("https://docelec.insa-lyon.fr/login?url=http://nouveau.europresse.com/access/ip/default.aspx?un=INSAT_3"); await page.goto("https://nouveau-europresse-com.docelec.insa-lyon.fr/webpages/Pdf/SearchForm.aspx"); const journalNames = await page.evaluate(() => { let journalNames = []; document.querySelectorAll("#lbSources option").forEach((item) => { journalNames.push(item.value+' : '+item.innerText); }); return journalNames; }); for (let journalName of journalNames) console.log(journalName); const journalName = await readLine('newspaper id'); // "EC_P", "LF_P" await page.select("select#lbSources", journalName); await Promise.all([ // ouverture popup pdf page.click('#btnSearch'), page.waitForNavigation(), ]); await page.waitForTimeout(2000); const pages = await browser.pages(); const pop = pages[pages.length - 1]; await pop.setRequestInterception(true); countRequest = (() => { let count = 0; return async request => { if(request.url().indexOf('DocName=pdf')>0) { const options = { headers: request._headers, output: count+'.pdf' }; const cookies = await pop.cookies(); options.headers.Cookie = cookies.map(ck => ck.name+'='+ck.value).join(';'); needle.get(request._url, options); count++; } else { request.continue(); } }; })(); pop.on('request', countRequest); const frameList = pop.frames().find((frame) => frame.name() === 'ListDoc'); await frameList.waitForSelector('#listdoc'); const pdfIds = await frameList.evaluate(() => { pdfIds = []; document.querySelectorAll("#listdoc a").forEach((item) => { pdfIds.push('#'+item.id); }); return pdfIds; }); //console.log(JSON.stringify(pdfIds, null, 4)); let nbPages = 0; for (let id of pdfIds) { console.log('page: ' + id); await Promise.all([ frameList.click(id), frameList.waitForTimeout(1000), ]); nbPages++; } await browser.close(); let pdfFilenames = []; for (let i=0; i { if (error) { console.log(`error: ${error.message}`); } if (stderr) { console.log(`stderr: ${stderr}`); } console.log(`stdout: ${stdout}`); }); for (let filename of pdfFilenames) { try { fs.unlinkSync(filename); } catch (err) { console.error(err); } } })();