<?php
namespace RSSImporter\Included\Importer\CrawlTargets;

use \RSSImporter\Included\Importer\Crawler;
use \RSSImporter\Included\Importer\CrawlTargetInterface;

class MehrNews extends Crawler implements CrawlTargetInterface
{
    public function loadNews(string $url):array
    {
        $pageData = \RSSImporter\Included\Importer\Crawler::request($url);
        
        $postData = [];

        if($pageData)
        {
            $dom = new \DOMDocument();

            $dom->validateOnParse = true; //<!-- this first
            @$dom->loadHTML('<?xml encoding="utf-8" ?>'.$pageData);        //'cause 'load' == 'parse
            $dom->preserveWhiteSpace = false;
            $finder = new \DomXPath($dom);
            $classname="item-header";
            $postData['main_image'] = @$finder->query("//figure[@class='item-img']/img/@src")[0]->value;

            $entries = $finder->query("//p[@class='introtext']");
            $postData['title'] = @$finder->query("//h1[@class='title']")[0]->textContent;
            $postData['introtext'] = @$finder->query("//p[@class='summary introtext']")[0]->textContent;
            $body = @$finder->query("//div[@class='item-body']")[0];
            
            if($body)
            {
                foreach ($finder->query("//div[@class='item-body']/div[@class='gallery hidden']") as $key => $value) {
                    $value->parentNode->removeChild($value);
                }
                foreach ($finder->query("//div[@class='item-body']/div[@class='item-code']") as $key => $value) {
                    $value->parentNode->removeChild($value);
                }
                foreach ($finder->query("//div[@class='item-body']/div[@class='item-author']") as $key => $value) {
                    $value->parentNode->removeChild($value);
                }
                $body = @$finder->query("//div[@class='item-body']")[0];
                $postData['body']     = trim($dom->saveHTML($body));
            }
            $postData['categories'] = $this->extractCategories($finder);
            $postData['tags'] = $this->extractTags($finder);
            if(empty($postData['title']) || empty($postData['body']))
                return [];
        }
        else
            throw new \Exception("Could not crawl data.", 1);
         

        return $postData;
    }

    private function extractCategories(\DomXPath $finder):array
    {
        $dom = new \DOMDocument('1.0');

        $entries = $finder->query('(//ol[@class="breadcrumb"])[1]/li');
        
        $categories = [];

        foreach ($entries as $OL)
        {
            $categories[] = trim(ltrim(rtrim($OL->textContent)));//trim(@$dom->saveXML($OL));//
        }

        return $categories;
    }

    private function extractTags(\DomXPath $finder):array
    {
        $dom = new \DOMDocument('1.0');

        $entries = $finder->query('(//section[@class="box tags"])/div/ul/li');
        
        $categories = [];

        foreach ($entries as $OL)
        {
            $categories[] = trim(ltrim(rtrim($OL->textContent)));//trim(@$dom->saveXML($OL));//
        }

        return $categories;
    }
}
