diff --git a/app/Services/PageScrapper.php b/app/Services/PageScrapper.php index 7a2b7aa..f52aabe 100644 --- a/app/Services/PageScrapper.php +++ b/app/Services/PageScrapper.php @@ -15,21 +15,42 @@ class PageScrapper public function getHTML() { + $page = file_get_contents($this->url); - $strForPregMatch = "/" . "{$this->contentMarker}" . "(.*)<\/div>/isU"; + $strForPregMatch = "/" . "{$this->contentMarker}" . "(.*)<\/div>/is"; + $arr = []; $rez = preg_match_all($strForPregMatch, $page, $arr); - return $content = $arr[1][0] . ''; + + return $content = $arr[1][0]; + + } public function normalizeURLFile($content) { - $rez = preg_match_all('/href="(.*)"/isU', $content, $arr); + + $rez = preg_match_all('//isU',$content,$arr); + $arr[1] = array_unique($arr[1]); foreach ($arr[1] as $el) { - if (strpos($el, 'http') !== 0) { - $content = str_replace($el, 'https://mkgtu.ru' . $el, $content); + if (!str_starts_with($el, 'https')){ + $content = str_replace($el,'https://mkgtu.ru' . $el,$content); } } + return $content; + + } + public function cutHTML($content,$strForScissors) + { + + + $arr = []; + $rez = preg_match_all($strForScissors, $content, $arr); + $content = str_replace($arr[1],'',$content); + + return $content; + + } } diff --git a/resources/views/home.blade.php b/resources/views/home.blade.php index 6547279..51c2bf5 100644 --- a/resources/views/home.blade.php +++ b/resources/views/home.blade.php @@ -26,7 +26,7 @@