<?php
namespace RSSImporter\Included\Importer\CrawlTargets;

use \RSSImporter\Included\Importer\Crawler;
use \RSSImporter\Included\Importer\CrawlTargetInterface;

class MashreghNews extends Crawler implements CrawlTargetInterface
{
    public function loadNews(string $url):array
    {
        $pageData = \RSSImporter\Included\Importer\Crawler::request($url);
        $postData = [];
        if($pageData)
        {
            $dom = new \DOMDocument();

            $dom->validateOnParse = true; //<!-- this first
            @$dom->loadHTML('<?xml encoding="utf-8" ?>'.$pageData);        //'cause 'load' == 'parse
            $dom->preserveWhiteSpace = false;
            $finder = new \DomXPath($dom);
            $classname="item-header";
            $postData['main_image'] = @$finder->query("//figure/img/@src")[0]->value;

            $entries = $finder->query("//p[@class='introtext']");
            $postData['title'] = trim(@$finder->query("//h1[@class='title']")[0]->textContent);
            $postData['introtext'] = trim(@$finder->query("//p[@class='summary introtext']")[0]->textContent);
            $postData['body']     = trim(@$finder->query("//div[@class='item-text']")[0]->textContent);
            $postData['categories'] = $this->extractCategories($finder);
            
            if(empty($postData['title']) || empty($postData['body']))
                return [];

        }
        else
            throw new \Exception("Could not crawl data.", 1);
         

        return $postData;
    }

    private function extractCategories(\DomXPath $finder):array
    {
        $dom = new \DOMDocument('1.0');

        $entries = $finder->query('(//ol[@class="breadcrumb"])/li');
        $categories = [];

        foreach ($entries as $OL)
        {
            if(trim(ltrim(rtrim($OL->textContent))) == 'صفحه اصلی') continue;
            
            $item = trim(ltrim(rtrim($OL->textContent)));//trim(@$dom->saveXML($OL));//;

            if(!in_array($item, $categories))
                $categories[] = $item;
        }


        return $categories;
    }
}
