main #10

Merged
aslan merged 4 commits from RomanGolienko/Roman_applicant-site:main into main 2024-01-29 17:34:01 +03:00
3 changed files with 61 additions and 15 deletions
Showing only changes of commit 66939341a3 - Show all commits

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
/public/img/icons/play-button.png
/.phpunit.cache /.phpunit.cache
/node_modules /node_modules
/public/build /public/build

View File

@ -2,6 +2,9 @@
namespace App\Services; namespace App\Services;
use DiDom\Document;
/*
class PageScrapper class PageScrapper
{ {
private string $url; private string $url;
@ -60,3 +63,39 @@ class PageScrapper
return $content; return $content;
} }
} }
*/
class PageScrapper
{
private string $url;
private string $contentMarker;
public function __construct($url, $contentMarker)
{
$this->url = $url;
$this->contentMarker = $contentMarker;
$this->document = new Document($this->url, true);
}
public function getFullHTML()
{
return $this->document;
}
public function printHTML()
{
$rez = $this->document;
$content = $rez->first($this->contentMarker)->html();
return $content;
}
public function normalizePath()
{
$links = (string) $this->document->find('a[href]');
dd($links);
foreach ($links as $link) {
if (!str_starts_with($link, 'https')) {
$this->document = str_replace($link, 'https://mkgtu.ru' . $link, $this->document);
}
}
return $this->document;
}
}

View File

@ -36,25 +36,31 @@
<div class="col-10"> <div class="col-10">
@php @php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/", '.content_info');
$content = $pageScrapper->printHTML();
$content = $pageScrapper->normalizePath();
echo $content;
use DiDom\Document; // use DiDom\Document;
//
$document = new Document('https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/', true); // $document = new Document('https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/', true);
//
$posts = $document->find('.content_info'); // $posts = $document->find('.content_info');
//
echo $document->first('.content_info')->html(); // echo $document->first('.content_info')->html();
/* /*
use App\Services\PageScrapper; use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/", '<div class=["\']content_info["\']>'); $pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML(); $row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row); $content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU'); $content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
echo $content; echo $content;
*/ */
@endphp @endphp