Merge remote-tracking branch 'origin/main'

# Conflicts:
#	app/Services/PageScrapper.php
This commit is contained in:
aslan 2024-01-31 15:45:38 +03:00
commit 06b662d39a
1 changed files with 36 additions and 22 deletions

View File

@ -93,33 +93,47 @@ class PageScrapper
$links = $content->find('a[href]'); $links = $content->find('a[href]');
$srclinks = $content->find('img[src]'); $srclinks = $content->find('img[src]');
// $html0 = $this->document->format()->html(); $html0 = $content->html();
// dd($html0);
// $html0 = $content->find('a[href]')->set; preg_match_all('/<a href="(.*)">/isU', $html0, $arr);
// foreach ($links as $k => $link) { foreach ($arr[1] as $el) {
// $href = $link->attr('href'); $html0 = str_replace($el, urldecode($el), $html0);
// }
// if (!str_contains($link->attr('href'), "https://")) { preg_match_all('/<img src="(.*)">/isU', $html0, $arr2);
// foreach ($arr2[1] as $el) {
// $html0 = str_replace($el, urldecode($el), $html0);
// $tmp = explode('/', rawurldecode($href)); }
// foreach ( $tmp as $k => $v) {
// $tmp[$k] = rawurlencode($v); foreach ($links as $k => $link) {
// } $href = $link->attr('href');
// $href = implode('/', $tmp); if (!str_contains($link->attr('href'), "https://")) {
// $newHref = 'https://mkgtu.ru' . $href; $unchanged = $link->attr('href');
// echo 'https://mkgtu.ru' . $href; $changed = $link->href = 'https://mkgtu.ru' . $href;
// echo '<br>'; $html0 = str_replace(urldecode($unchanged), $changed, $html0);
// echo '<br>'; }
// $html0 = str_replace($href, $newHref, $html0); }
// }
// }
foreach ($srclinks as $k => $srclink) {
$src = $srclink->attr('src');
if (!str_contains($srclink->attr('src'), "https://")) {
$unchanged = $srclink->attr('src');
$changed = $srclink->src = 'https://mkgtu.ru' . $src;
$html0 = str_replace(urldecode($unchanged), $changed, $html0);
}
}
// foreach ($srclinks as $k => $srclink) { // foreach ($srclinks as $k => $srclink) {
// $src = $srclink->attr('src'); // $src = $srclink->attr('src');
// //
// if (!str_contains($srclink->attr('src'), "https://")) { // if (!str_contains($srclink->attr('src'), "https://")) {
//
//
// $tmp = explode('/', rawurldecode($src)); // $tmp = explode('/', rawurldecode($src));
// foreach ($tmp as $k => $v) { // foreach ( $tmp as $k => $v) {
// $tmp[$k] = rawurlencode($v); // $tmp[$k] = rawurlencode($v);
// } // }
// $src = implode('/', $tmp); // $src = implode('/', $tmp);