forked from aslan/applicant-site
newScrapper
This commit is contained in:
parent
3d33a03f97
commit
66939341a3
|
@ -1,3 +1,4 @@
|
|||
/public/img/icons/play-button.png
|
||||
/.phpunit.cache
|
||||
/node_modules
|
||||
/public/build
|
||||
|
|
|
@ -2,6 +2,9 @@
|
|||
|
||||
namespace App\Services;
|
||||
|
||||
use DiDom\Document;
|
||||
|
||||
/*
|
||||
class PageScrapper
|
||||
{
|
||||
private string $url;
|
||||
|
@ -60,3 +63,39 @@ class PageScrapper
|
|||
return $content;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
class PageScrapper
|
||||
{
|
||||
private string $url;
|
||||
private string $contentMarker;
|
||||
|
||||
public function __construct($url, $contentMarker)
|
||||
{
|
||||
$this->url = $url;
|
||||
$this->contentMarker = $contentMarker;
|
||||
$this->document = new Document($this->url, true);
|
||||
}
|
||||
public function getFullHTML()
|
||||
{
|
||||
return $this->document;
|
||||
}
|
||||
public function printHTML()
|
||||
{
|
||||
$rez = $this->document;
|
||||
$content = $rez->first($this->contentMarker)->html();
|
||||
return $content;
|
||||
}
|
||||
public function normalizePath()
|
||||
{
|
||||
$links = (string) $this->document->find('a[href]');
|
||||
dd($links);
|
||||
foreach ($links as $link) {
|
||||
if (!str_starts_with($link, 'https')) {
|
||||
$this->document = str_replace($link, 'https://mkgtu.ru' . $link, $this->document);
|
||||
}
|
||||
}
|
||||
|
||||
return $this->document;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,25 +36,31 @@
|
|||
|
||||
<div class="col-10">
|
||||
@php
|
||||
use App\Services\PageScrapper;
|
||||
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/", '.content_info');
|
||||
$content = $pageScrapper->printHTML();
|
||||
$content = $pageScrapper->normalizePath();
|
||||
|
||||
echo $content;
|
||||
|
||||
|
||||
|
||||
|
||||
use DiDom\Document;
|
||||
|
||||
$document = new Document('https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/', true);
|
||||
|
||||
$posts = $document->find('.content_info');
|
||||
|
||||
echo $document->first('.content_info')->html();
|
||||
/*
|
||||
use App\Services\PageScrapper;
|
||||
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/", '<div class=["\']content_info["\']>');
|
||||
$row = $pageScrapper->getHTML();
|
||||
$content = $pageScrapper->normalizeURLFile($row);
|
||||
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
|
||||
echo $content;
|
||||
*/
|
||||
// use DiDom\Document;
|
||||
//
|
||||
// $document = new Document('https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/', true);
|
||||
//
|
||||
// $posts = $document->find('.content_info');
|
||||
//
|
||||
// echo $document->first('.content_info')->html();
|
||||
/*
|
||||
use App\Services\PageScrapper;
|
||||
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/", '<div class=["\']content_info["\']>');
|
||||
$row = $pageScrapper->getHTML();
|
||||
$content = $pageScrapper->normalizeURLFile($row);
|
||||
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
|
||||
echo $content;
|
||||
*/
|
||||
|
||||
@endphp
|
||||
|
||||
|
|
Loading…
Reference in New Issue