diff --git a/app/Services/PageScrapper.php b/app/Services/PageScrapper.php index 4c14777..bc479ad 100644 --- a/app/Services/PageScrapper.php +++ b/app/Services/PageScrapper.php @@ -93,33 +93,47 @@ class PageScrapper $links = $content->find('a[href]'); $srclinks = $content->find('img[src]'); -// $html0 = $this->document->format()->html(); -// dd($html0); -// $html0 = $content->find('a[href]')->set; -// foreach ($links as $k => $link) { -// $href = $link->attr('href'); -// -// if (!str_contains($link->attr('href'), "https://")) { -// -// -// $tmp = explode('/', rawurldecode($href)); -// foreach ( $tmp as $k => $v) { -// $tmp[$k] = rawurlencode($v); -// } -// $href = implode('/', $tmp); -// $newHref = 'https://mkgtu.ru' . $href; -// echo 'https://mkgtu.ru' . $href; -// echo '
'; -// echo '
'; -// $html0 = str_replace($href, $newHref, $html0); -// } -// } + $html0 = $content->html(); + + preg_match_all('//isU', $html0, $arr); + foreach ($arr[1] as $el) { + $html0 = str_replace($el, urldecode($el), $html0); + } + preg_match_all('//isU', $html0, $arr2); + foreach ($arr2[1] as $el) { + $html0 = str_replace($el, urldecode($el), $html0); + } + + foreach ($links as $k => $link) { + $href = $link->attr('href'); + if (!str_contains($link->attr('href'), "https://")) { + $unchanged = $link->attr('href'); + $changed = $link->href = 'https://mkgtu.ru' . $href; + $html0 = str_replace(urldecode($unchanged), $changed, $html0); + } + } + + + + + + + foreach ($srclinks as $k => $srclink) { + $src = $srclink->attr('src'); + if (!str_contains($srclink->attr('src'), "https://")) { + $unchanged = $srclink->attr('src'); + $changed = $srclink->src = 'https://mkgtu.ru' . $src; + $html0 = str_replace(urldecode($unchanged), $changed, $html0); + } + } // foreach ($srclinks as $k => $srclink) { // $src = $srclink->attr('src'); // // if (!str_contains($srclink->attr('src'), "https://")) { +// +// // $tmp = explode('/', rawurldecode($src)); -// foreach ($tmp as $k => $v) { +// foreach ( $tmp as $k => $v) { // $tmp[$k] = rawurlencode($v); // } // $src = implode('/', $tmp);