Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
202 views
in Technique[技术] by (71.8m points)

json - Create array from DOM node values in PHP

I'm trying to create an array of amazon product variants using DOM php, My desired array should look like;


["Variant Name":"ASIN number"]

Here is my code:

$ch = curl_init();

curl_setopt($ch, CURLOPT_URL, 'https://www.amazon.co.uk/dp/B08LZHMQXS?psc=1');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
$html = curl_exec($ch)
curl_close($ch);


$dom = new DomDocument();
            $dom->loadHTML($html);
            $dom_xpath = new DOMXpath($dom);


$variants = $dom_xpath->query('//*[@class="swatchAvailable" or @class="swatchSelect"]');
foreach($variants as $data){

           $input = $data->getAttribute("data-defaultasin");
           $inputn = $data->getAttribute("title");
if (!empty($input)) {
    
preg_match_all('/(.{10})/', $input, $output);

$output1 = str_replace("Click to select ","|",$inputn);
$split = explode("|", $output1);


$json1->SizeVariant3[] = $split[1];    
$json1->SizeVariant4[] = $output[0][0];


$json->VariantB = array_combine($json1->SizeVariant3,$json1->SizeVariant4);

}    
}



Though my code is working but there are some mistakes in code and it might wont work for all amazon products. so I need suggestions and improvement. and also my output is a json object :

{
        "2031 Deep Blue": "B08LZH84TN",
        "2031 Khaki": "B08LZHMQXS",
 }

while I want it to be an array as I mentioned above.

question from:https://stackoverflow.com/questions/65887903/create-array-from-dom-node-values-in-php

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Answer

0 votes
by (71.8m points)
<?php       
    function curl( $url=NULL ){
        $cacert='c:/wwwroot/cacert.pem';    # download a copy from internet - https://curl.haxx.se/docs/caextract.html
        
        $curl=curl_init();
        if( parse_url( $url,PHP_URL_SCHEME )=='https' ){
            curl_setopt( $curl, CURLOPT_SSL_VERIFYPEER, true );
            curl_setopt( $curl, CURLOPT_SSL_VERIFYHOST, 2 );
            curl_setopt( $curl, CURLOPT_CAINFO, $cacert );
            curl_setopt( $curl, CURLOPT_CAPATH, $cacert );
        }
        curl_setopt( $curl, CURLOPT_URL,trim( $url ) );
        curl_setopt( $curl, CURLOPT_AUTOREFERER, true );
        curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, true );
        curl_setopt( $curl, CURLOPT_FAILONERROR, true );
        curl_setopt( $curl, CURLOPT_HEADER, false );
        curl_setopt( $curl, CURLINFO_HEADER_OUT, false );
        curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true );
        curl_setopt( $curl, CURLOPT_BINARYTRANSFER, true );
        curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 20 );
        curl_setopt( $curl, CURLOPT_TIMEOUT, 60 );
        curl_setopt( $curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Curly-Wurly Ding-Dong' );
        curl_setopt( $curl, CURLOPT_MAXREDIRS, 10 );
        curl_setopt( $curl, CURLOPT_ENCODING, '' );
        
        $res=(object)array(
            'response'  =>  curl_exec( $curl ),
            'info'      =>  (object)curl_getinfo( $curl ),
            'errors'    =>  curl_error( $curl )
        );
        curl_close( $curl );
        return $res;
    }
    
    
    
    
    
    $url='https://www.amazon.co.uk/dp/B08LZHMQXS?psc=1';
    
    $res=curl( $url );
    if( $res->info->http_code==200 ){
    
        libxml_use_internal_errors( true );
        $dom=new DOMDocument;
        
        $dom->validateOnParse=false;
        $dom->recover=true;
        $dom->strictErrorChecking=false;
        $dom->loadHTML( $res->response );
        libxml_clear_errors();
        
        $xp=new DOMXPath( $dom );
        $expr='//*[@class="swatchAvailable" or @class="swatchSelect"]';
        
        $tmp=array();
        
        $col=$xp->query( $expr );
        if( $col && $col->length > 0 ){
            foreach( $col as $node ){
                $asin=$node->getAttribute('data-defaultasin');
                $title=str_replace( array( 'Click to select ', '|' ), '', $node->getAttribute('title') );
                $tmp[$title]=$asin;
            }
        }

        printf('<pre>%s</pre>',print_r($tmp,true));
    }
?>

Which outputs:

Array
(
    [2031 Deep Blue] => B08LZH84TN
    [2031 Khaki] => B08LZHMQXS
    [2031 Light Grey] => B08LZFGGRL
    [2031 Navy] => B08LZNGD5H
    [2031 Deep Grey] => B08LZHZXDW
    [2031 Wine Red] => B08LZHHGPD
    [8636 All White] => B07PHQ69B7
    [2031 All Black] => B08LZKXC3G
)

When tested with a new url it yielded:

Array
(
    [Wine] => B07NYYZSWG
    [Gold] => B07H4P7TZP
    [Rose Gold] => B07H4ZMTML
    [Silver Glitz] => B07P27Y9SQ
)

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...