<?php
namespace RSSImporter\Included\Importer\CrawlTargets;


use \RSSImporter\Included\Importer\Crawler;
use \RSSImporter\Included\Importer\CrawlTargetInterface;

// Isna.ir crawl worker
class Isna extends Crawler implements CrawlTargetInterface
{
    public function loadNews(string $url):array
    {
        $n = parse_url($url);
        $x = explode("/", $n['path']);

        if(isset($x[3]))
        {
            $n['path'] = str_replace($x[3], urlencode($x[3]), $n['path']);
        }

        $url = $n['scheme'] . '://'. $n['host'] .'/' . ($n['path']);
        
        
        $pageData = \RSSImporter\Included\Importer\Crawler::request($url);
        
        $postData = [];

        if($pageData)
        {
            $dom = new \DOMDocument();

            $dom->validateOnParse = true; //<!-- this first
            @$dom->loadHTML('<?xml encoding="utf-8" ?>'.$pageData);        //'cause 'load' == 'parse
            $dom->preserveWhiteSpace = false;
            $finder = new \DomXPath($dom);
            $classname="item-header";

            $postData['main_image'] = @$finder->query("//figure[@class='item-img img-md']/img/@src")[0]->value;

            $entries = $finder->query("//p[@class='introtext']");
            $postData['title'] = @$finder->query("//h1[@class='first-title']")[0]->textContent;
            $postData['introtext'] = @$finder->query("//p[@class='summary']")[0]->textContent;
            $postData['body']     = trim(@$finder->query("//div[@class='item-text']")[0]->textContent);
            $postData['categories'] = trim(@$finder->query("//span[@class='text-meta']")[1]->textContent) ?: "";

            if(empty($postData['title']) || empty($postData['body']))
                return [];
        }
        else
            throw new \Exception("Could not crawl data.", 1);
         

        return $postData;
    }

    private function extractCategories(\DomXPath $finder):array
    {
        $dom = new \DOMDocument('1.0');

        $entries = $finder->query('//ol[@class="breadcrumb vertical"]/li');
        
        $categories = [];

        foreach ($entries as $OL)
        {
            $categories[] = trim(ltrim(rtrim($OL->textContent)));//trim(@$dom->saveXML($OL));//
        }

        return $categories;
    }
}
