From 66939341a357de3b8492b3a9a8ead4d1f797824c Mon Sep 17 00:00:00 2001 From: ROMANGOLIENKO Date: Fri, 26 Jan 2024 14:45:33 +0300 Subject: [PATCH] newScrapper --- .gitignore | 1 + app/Services/PageScrapper.php | 39 +++++++++++++++++++++++++++ resources/views/menu/course.blade.php | 36 ++++++++++++++----------- 3 files changed, 61 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index b01c8c9..2749dec 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +/public/img/icons/play-button.png /.phpunit.cache /node_modules /public/build diff --git a/app/Services/PageScrapper.php b/app/Services/PageScrapper.php index 0edaa7c..e3ac583 100644 --- a/app/Services/PageScrapper.php +++ b/app/Services/PageScrapper.php @@ -2,6 +2,9 @@ namespace App\Services; +use DiDom\Document; + +/* class PageScrapper { private string $url; @@ -60,3 +63,39 @@ class PageScrapper return $content; } } +*/ + +class PageScrapper +{ + private string $url; + private string $contentMarker; + + public function __construct($url, $contentMarker) + { + $this->url = $url; + $this->contentMarker = $contentMarker; + $this->document = new Document($this->url, true); + } + public function getFullHTML() + { + return $this->document; + } + public function printHTML() + { + $rez = $this->document; + $content = $rez->first($this->contentMarker)->html(); + return $content; + } + public function normalizePath() + { + $links = (string) $this->document->find('a[href]'); + dd($links); + foreach ($links as $link) { + if (!str_starts_with($link, 'https')) { + $this->document = str_replace($link, 'https://mkgtu.ru' . $link, $this->document); + } + } + + return $this->document; + } +} diff --git a/resources/views/menu/course.blade.php b/resources/views/menu/course.blade.php index 8779bd1..4dc52b1 100644 --- a/resources/views/menu/course.blade.php +++ b/resources/views/menu/course.blade.php @@ -36,25 +36,31 @@
@php + use App\Services\PageScrapper; + $pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/", '.content_info'); + $content = $pageScrapper->printHTML(); + $content = $pageScrapper->normalizePath(); + + echo $content; - use DiDom\Document; - - $document = new Document('https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/', true); - - $posts = $document->find('.content_info'); - - echo $document->first('.content_info')->html(); - /* - use App\Services\PageScrapper; - $pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/", '
'); - $row = $pageScrapper->getHTML(); - $content = $pageScrapper->normalizeURLFile($row); - $content = $pageScrapper->cutHTML($content,'//isU'); - echo $content; - */ + // use DiDom\Document; + // + // $document = new Document('https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/', true); + // + // $posts = $document->find('.content_info'); + // + // echo $document->first('.content_info')->html(); + /* + use App\Services\PageScrapper; + $pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/", '
'); + $row = $pageScrapper->getHTML(); + $content = $pageScrapper->normalizeURLFile($row); + $content = $pageScrapper->cutHTML($content,'//isU'); + echo $content; + */ @endphp