From 18c4e21d0612d3ae6094c894680431dabdc61750 Mon Sep 17 00:00:00 2001 From: aslan Date: Wed, 31 Jan 2024 15:45:07 +0300 Subject: [PATCH] add merge from http://172.17.254.104/aslan/applicant-site/pulls/12 --- app/Services/PageScrapper.php | 71 ++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/app/Services/PageScrapper.php b/app/Services/PageScrapper.php index 46b4880..4c14777 100644 --- a/app/Services/PageScrapper.php +++ b/app/Services/PageScrapper.php @@ -93,41 +93,42 @@ class PageScrapper $links = $content->find('a[href]'); $srclinks = $content->find('img[src]'); - - $html0 = $content->html(); - foreach ($links as $k => $link) { - $href = $link->attr('href'); - - if (!str_contains($link->attr('href'), "https://")) { - - - $tmp = explode('/', rawurldecode($href)); - foreach ( $tmp as $k => $v) { - $tmp[$k] = rawurlencode($v); - } - $href = implode('/', $tmp); - - - $html0 = str_replace($href, 'https://mkgtu.ru' . $href, $html0); - } - } - foreach ($srclinks as $k => $srclink) { - $src = $srclink->attr('src'); - - if (!str_contains($srclink->attr('src'), "https://")) { - - - $tmp = explode('/', rawurldecode($src)); - foreach ( $tmp as $k => $v) { - $tmp[$k] = rawurlencode($v); - } - $src = implode('/', $tmp); - - - $html0 = str_replace($src, 'https://mkgtu.ru' . $src, $html0); - } - } - str_replace('st yle', 'style', $html0); +// $html0 = $this->document->format()->html(); +// dd($html0); +// $html0 = $content->find('a[href]')->set; +// foreach ($links as $k => $link) { +// $href = $link->attr('href'); +// +// if (!str_contains($link->attr('href'), "https://")) { +// +// +// $tmp = explode('/', rawurldecode($href)); +// foreach ( $tmp as $k => $v) { +// $tmp[$k] = rawurlencode($v); +// } +// $href = implode('/', $tmp); +// $newHref = 'https://mkgtu.ru' . $href; +// echo 'https://mkgtu.ru' . $href; +// echo '
'; +// echo '
'; +// $html0 = str_replace($href, $newHref, $html0); +// } +// } +// foreach ($srclinks as $k => $srclink) { +// $src = $srclink->attr('src'); +// +// if (!str_contains($srclink->attr('src'), "https://")) { +// $tmp = explode('/', rawurldecode($src)); +// foreach ($tmp as $k => $v) { +// $tmp[$k] = rawurlencode($v); +// } +// $src = implode('/', $tmp); +// +// +// $html0 = str_replace($src, 'https://mkgtu.ru' . $src, $html0); +// } +// } +// str_replace('st yle', 'style', $html0); return $html0; }