I did a lot of search... tried some methods.. There is webpage that html contents come after script execution.
I used phantomJS with different method.
1-) Checking with document.ready
var page = require('webpage').create();
console.log('The default user agent is ' + page.settings.userAgent);
page.settings.userAgent = 'SpecialAgent';
page.open('http://sosyal.hurriyet.com.tr/yazar/niobe_141/seni-unutmuyoruz-pasam_40011882', function(status) {
function checkReadyState() {
setTimeout(function () {
var readyState = page.evaluate(function () {
return document.readyState;
});
if ("complete" === readyState) {
onPageReady();
} else {
checkReadyState();
}
});
}
checkReadyState();
});
function onPageReady() {
var htmlContent = page.evaluate(function () {
return document.body.textContent;
});
console.log(htmlContent);
phantom.exit();
}
Result:Script not loaded so unloaded html returned..
2-)Setting timeout too long
page.open(address, function (status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit();
} else {
window.setTimeout(function () {
var htmlContent = page.evaluate(function () {
return document.getElementsByClassName('hsaalicc-text').textContent;
});
console.log(htmlContent);
}, 1000); // Change timeout as required to allow sufficient time
}
});
Result:Script not loaded so unloaded html returned..
So although I'm android developper and have not too much jquery knowlodge looked page code with chrome developper console... And I see all data that should be load is in script with window.articleDetailData
Moreover I found the function that load data content.
('#templateArticleDetail').tmpl(data).appendTo('#articleDetailContainer');
There is no time parameter,but in mobile device it takes time. But in code I understand when page loaded it should copy to #articleDetailContainer
So my question 1-) why document ready and high timeout not return loaded script page with phantomJS 2-) Is there a way to parse windows.data under script tag??
If I could not find any easy way,will use regex to parse script