I've found a way to do this using casperjs (it should work with phantomjs alone if you implement the download function using XMLHttpRequest, but i've not tried).
I'll leave you the working example, that tries to download the mos recent PDF from this page. When you click the download link, some javascript code is triggered that generates some hidden input fields that are then POSTed.
What we do is replace the form's onsubmit function so that it cancels the submission, and get the form destination (action) and all its fields. We use this information later to do the actual download.
var casper=require('casper').create();
casper.start("https://sede.gobcan.es/tributos/jsf/publico/notificaciones/comparecencia/ultimosanuncios.jsp", function() {
var theFormRequest = this.page.evaluate(function() {
var request = {};
var formDom = document.forms["resultadoUltimasNotif"];
formDom.onsubmit = function() {
//iterate the form fields
var data = {};
for(var i = 0; i < formDom.elements.length; i++) {
data[formDom.elements[i].name] = formDom.elements[i].value;
}
request.action = formDom.action;
request.data = data;
return false; //Stop form submission
}
//Trigger the click on the link.
var link = $("table.listado tbody tr:first a");
link.click();
return request; //Return the form data to casper
});
//Start the download
casper.download(theFormRequest.action, "downloaded_file.pdf", "POST", theFormRequest.data);
});
casper.run();
Note: you have to run it with --ignore-ssl-errors, as the CA they use isn't in your browser default CA list.
casperjs --ignore-ssl-errors=true downloadscript.js
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…