<?php
function curl( $url=NULL ){
$cacert='c:/wwwroot/cacert.pem'; # download a copy from internet - https://curl.haxx.se/docs/caextract.html
$curl=curl_init();
if( parse_url( $url,PHP_URL_SCHEME )=='https' ){
curl_setopt( $curl, CURLOPT_SSL_VERIFYPEER, true );
curl_setopt( $curl, CURLOPT_SSL_VERIFYHOST, 2 );
curl_setopt( $curl, CURLOPT_CAINFO, $cacert );
curl_setopt( $curl, CURLOPT_CAPATH, $cacert );
}
curl_setopt( $curl, CURLOPT_URL,trim( $url ) );
curl_setopt( $curl, CURLOPT_AUTOREFERER, true );
curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, true );
curl_setopt( $curl, CURLOPT_FAILONERROR, true );
curl_setopt( $curl, CURLOPT_HEADER, false );
curl_setopt( $curl, CURLINFO_HEADER_OUT, false );
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $curl, CURLOPT_BINARYTRANSFER, true );
curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 20 );
curl_setopt( $curl, CURLOPT_TIMEOUT, 60 );
curl_setopt( $curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Curly-Wurly Ding-Dong' );
curl_setopt( $curl, CURLOPT_MAXREDIRS, 10 );
curl_setopt( $curl, CURLOPT_ENCODING, '' );
$res=(object)array(
'response' => curl_exec( $curl ),
'info' => (object)curl_getinfo( $curl ),
'errors' => curl_error( $curl )
);
curl_close( $curl );
return $res;
}
$url='https://www.amazon.co.uk/dp/B08LZHMQXS?psc=1';
$res=curl( $url );
if( $res->info->http_code==200 ){
libxml_use_internal_errors( true );
$dom=new DOMDocument;
$dom->validateOnParse=false;
$dom->recover=true;
$dom->strictErrorChecking=false;
$dom->loadHTML( $res->response );
libxml_clear_errors();
$xp=new DOMXPath( $dom );
$expr='//*[@class="swatchAvailable" or @class="swatchSelect"]';
$tmp=array();
$col=$xp->query( $expr );
if( $col && $col->length > 0 ){
foreach( $col as $node ){
$asin=$node->getAttribute('data-defaultasin');
$title=str_replace( array( 'Click to select ', '|' ), '', $node->getAttribute('title') );
$tmp[$title]=$asin;
}
}
printf('<pre>%s</pre>',print_r($tmp,true));
}
?>
Which outputs:
Array
(
[2031 Deep Blue] => B08LZH84TN
[2031 Khaki] => B08LZHMQXS
[2031 Light Grey] => B08LZFGGRL
[2031 Navy] => B08LZNGD5H
[2031 Deep Grey] => B08LZHZXDW
[2031 Wine Red] => B08LZHHGPD
[8636 All White] => B07PHQ69B7
[2031 All Black] => B08LZKXC3G
)
When tested with a new url it yielded:
Array
(
[Wine] => B07NYYZSWG
[Gold] => B07H4P7TZP
[Rose Gold] => B07H4ZMTML
[Silver Glitz] => B07P27Y9SQ
)
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…