-
-
Notifications
You must be signed in to change notification settings - Fork 184
Description
I tried to use HtmlUnit to scrape the webpage of https://www.maersk.com/tracking/
I already set webClient.getOptions().setJavaScriptEnabled(true);
String url = "https://www.maersk.com/tracking/";
HtmlPage htmlPage;
try (WebClient webClient = new WebClient(BrowserVersion.FIREFOX)) {
webClient.getOptions().setUseInsecureSSL(true);
webClient.getOptions().setCssEnabled(true);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
webClient.getCookieManager().setCookiesEnabled(true);
try {
htmlPage = webClient.getPage(url);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
System.out.println("HTML Page: \n" + htmlPage.asXml());
However, I still got a page saying Maersk doesn't work properly without JavaScript enabled. Please enable it to continue.
The HTML Page I got is as below:
<?xml version="1.0" encoding="UTF-8"?>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<link rel="preconnect" href="https://assets.maerskline.com"/>
<title>
Track Shipment | Cargo & Container Tracking | Maersk
</title>
<meta name="description" content="It's easy to track your shipment online with Maersk. Simply enter your bill of lading, shipment or container number and click Track."/>
<iframe src="javascript:void(0)" title="" style="width:0;height:0;border:0;display:none;">
</iframe>
<script async="" src="//www.googletagmanager.com/gtm.js?id=GTM-W6LN7D">
</script>
<script type="text/javascript" src="https://www.maersk.com/static/4cc4672db364488d1796395f48d13b19892400114f7597">
</script>
<script>
//<![CDATA[
var dataLayer = [];
(function (w, d, s, l, i) {
w[l] = w[l] || []; w[l].push({ 'gtm.start': new Date().getTime(), event: 'gtm.js' });
var f = d.getElementsByTagName(s)[0], j = d.createElement(s), dl = l != 'dataLayer' ? '&l=' + l : '';
j.async = true; j.src = '//www.googletagmanager.com/gtm.js?id=' + i + dl;
f.parentNode.insertBefore(j, f);
})(window, document, 'script', 'dataLayer', 'GTM-W6LN7D');
//]]>
</script>
<script src="https://www.datadoghq-browser-agent.com/datadog-rum-eu.js">
</script>
<script>
//<![CDATA[
if (window.location.hostname !== "localhost") {
var isProd = (["www", "beta", "mybeta"].indexOf(window.location.host.substr(0, window.location.host.indexOf('.'))) > -1);
window.DD_RUM && window.DD_RUM.init(
{
clientToken: isProd ? 'pub37f04b35d0d33532dc0c637c1350075d' : 'pubf43a1b6494c468ba2e182ddc2bb4838c',
applicationId: isProd ? '3349ab16-997d-4708-b10e-31ad5079dab8' : '9cddc80f-4698-4d53-ae1b-3c817fe3f033',
service: "ui-tracking"
}
);
}
//]]>
</script>
<script>
//<![CDATA[
!function(a){var e="https://s.go-mpulse.net/boomerang/",t="addEventListener";if("False"=="True")a.BOOMR_config=a.BOOMR_config||{},a.BOOMR_config.PageParams=a.BOOMR_config.PageParams||{},a.BOOMR_config.PageParams.pci=!0,e="https://s2.go-mpulse.net/boomerang/";if(window.BOOMR_API_key="T2WNA-KCBS3-CZU4N-2SR5A-FXN8A",function(){function n(e){a.BOOMR_onload=e&&e.timeStamp||(new Date).getTime()}if(!a.BOOMR||!a.BOOMR.version&&!a.BOOMR.snippetExecuted){a.BOOMR=a.BOOMR||{},a.BOOMR.snippetExecuted=!0;var i,_,o,r=document.createElement("iframe");if(a[t])a[t]("load",n,!1);else if(a.attachEvent)a.attachEvent("onload",n);r.src="javascript:void(0)",r.title="",r.role="presentation",(r.frameElement||r).style.cssText="width:0;height:0;border:0;display:none;",o=document.getElementsByTagName("script")[0],o.parentNode.insertBefore(r,o);try{_=r.contentWindow.document}catch(O){i=document.domain,r.src="javascript:var d=document.open();d.domain='"+i+"';void(0);",_=r.contentWindow.document}_.open()._l=function(){var a=this.createElement("script");if(i)this.domain=i;a.id="boomr-if-as",a.src=e+"T2WNA-KCBS3-CZU4N-2SR5A-FXN8A",BOOMR_lstart=(new Date).getTime(),this.body.appendChild(a)},_.write("<bo"+'dy onload="document._l();">'),_.close()}}(),"".length>0)if(a&&"performance"in a&&a.performance&&"function"==typeof a.performance.setResourceTimingBufferSize)a.performance.setResourceTimingBufferSize();!function(){if(BOOMR=a.BOOMR||{},BOOMR.plugins=BOOMR.plugins||{},!BOOMR.plugins.AK){var e=""=="true"?1:0,t="",n="qpsmkcixjo2kmy3jrxua-f-fbd5c6e16-clientnsv4-s.akamaihd.net",i="false"=="true"?2:1,_={"ak.v":"33","ak.cp":"937959","ak.ai":parseInt("476710",10),"ak.ol":"0","ak.cr":122,"ak.ipv":4,"ak.proto":"http/1.1","ak.rid":"ffc6d1c","ak.r":42309,"ak.a2":e,"ak.m":"","ak.n":"essl","ak.bpcip":"131.228.197.0","ak.cport":3782,"ak.gh":"23.1.32.219","ak.quicv":"","ak.tlsv":"tls1.3","ak.0rtt":"","ak.csrc":"-","ak.acc":"reno","ak.t":"1667861992","ak.ak":"hOBiQwZUYzCg5VSAfCLimQ==ZcX216iRQ7gdX2lqDNcgWKblqD6BfhDD6vSQYUicAsXIq1b/o+srNy+XUPKDRnnmxpaD9h3G6Hgmy5blXXzGfHaE8gmew08ydPFk7q24D2E8YWP7QaCbYrfTJggSr31yIZRscY4DxrkaRAjx/Z89AlyJJ96R0QjQ8AHxhCIU8tyxPKUUXSMhb0g8K1lOvc0CgbBQWQwJOtRr2Xf7oORoIe4SOdN2OuwMKmr3tPuV3vP613kUXFZMa4R3GISqsGF8Ho1XZsu8Z9uYq804/vpUZ2GKthjPL58eEjzQpQj1ypoLSOhqJ42b4AGpYvDBxDBGWHTV0AmAIW54JNCl/guTQAQ7FUXpy8UKDs4+QN34uHpzCdJCWqqgj5bfQnBDVg4Bnf/xMTqEaCo1Za8go4/WOh11E4gtLhWqfF2QTax4Z1U=","ak.pv":"420","ak.dpoabenc":"","ak.tf":i};if(""!==t)_["ak.ruds"]=t;var o={i:!1,av:function(e){var t="http.initiator";if(e&&(!e[t]||"spa_hard"===e[t]))_["ak.feo"]=void 0!==a.aFeoApplied?1:0,BOOMR.addVar(_)},rv:function(){var a=["ak.bpcip","ak.cport","ak.cr","ak.csrc","ak.gh","ak.ipv","ak.m","ak.n","ak.ol","ak.proto","ak.quicv","ak.tlsv","ak.0rtt","ak.r","ak.acc","ak.t","ak.tf"];BOOMR.removeVar(a)}};BOOMR.plugins.AK={akVars:_,akDNSPreFetchDomain:n,init:function(){if(!o.i){var a=BOOMR.subscribe;a("before_beacon",o.av,null,null),a("onbeacon",o.rv,null,null),o.i=!0}return this},is_complete:function(){return!0}}}}()}(window);
//]]>
</script>
<script>
//<![CDATA[
bazadebezolkohpepadr="1439975919"
//]]>
</script>
<script type="text/javascript" src="https://www.maersk.com/akam/13/55d44856" defer="">
</script>
</head>
<body>
<noscript>
<strong>We're sorry but Track Shipment | Cargo &amp; Container Tracking | Maersk doesn't work properly without JavaScript enabled. Please enable it to continue.</strong>
</noscript>
<main id="main">
<div id="maersk-app">
</div>
</main>
<noscript>
<iframe src="https://www.googletagmanager.com/ns.html?id=GTM-W6LN7D" height="0" width="0" style="display:none;visibility:hidden"></iframe>
</noscript>
<script type="module" src="/tracking/js/chunk-vendors.a0eda998.js">
</script>
<script type="module" src="/tracking/js/index.9fb0269e.js">
</script>
<script>
//<![CDATA[
!function(){var e=document,t=e.createElement("script");if(!("noModule"in t)&&"onbeforeload"in t){var n=!1;e.addEventListener("beforeload",function(e){if(e.target===t)n=!0;else if(!e.target.hasAttribute("nomodule")||!n)return;e.preventDefault()},!0),t.type="module",t.src=".",e.head.appendChild(t),t.remove()}}();
//]]>
</script>
<script src="/tracking/js/chunk-vendors-legacy.a0eda998.js" nomodule="">
</script>
<script src="/tracking/js/index-legacy.216f3445.js" nomodule="">
</script>
<noscript>
<img src="https://www.maersk.com/akam/13/pixel_55d44856?a=dD1mYTFjOTcyOTAyMzgyNzE5ODM1OTE2ODMzYTUzOWUzNWUyNzQyYzkxJmpzPW9mZg==" style="visibility: hidden; position: absolute; left: -999px; top: -999px;" />
</noscript>
<script type="text/javascript" src="/IyR_SZ/rh2Hr/ffW1-/2Q/bYfV8kbLiO/fgMKIwUl/ekUHI/U4mHDUB">
</script>
</body>
</html>
Does anyone know if anything is not set correctly? Or the Maersk website has some special setting to access? Can anyone give try to scrape the https://www.maersk.com/tracking/? Thanks.
(I posted this question in Stack Overflow: https://stackoverflow.com/questions/74353983/htmlunit-webclient-enabled-javascript-but-still-got-javascript-disabled-message. Someone redirected me to create a ticket in the HtmlUnit GitHub here)