adding new service PageScrapper.php
This commit is contained in:
parent
574e300006
commit
10d18f6bf3
|
@ -0,0 +1,59 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Services;
|
||||||
|
|
||||||
|
class PageScrapper
|
||||||
|
{
|
||||||
|
private string $url;
|
||||||
|
private string $contentMarker;
|
||||||
|
|
||||||
|
public function __construct($url, $contentMarker = '<div class=["\']content_info["\']>')
|
||||||
|
{
|
||||||
|
$this->url = $url;
|
||||||
|
$this->contentMarker = $contentMarker;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getHTML()
|
||||||
|
{
|
||||||
|
// echo($this->url);
|
||||||
|
// echo($this->contentMarker);
|
||||||
|
$page = file_get_contents($this->url);
|
||||||
|
$strForPregMatch = "/" . "{$this->contentMarker}" . "(.*)<\/div>/isU";
|
||||||
|
$arr = [];
|
||||||
|
$rez = preg_match_all($strForPregMatch, $page, $arr);
|
||||||
|
//echo '<prE>'.print_r($arr[1]).'</pre>';
|
||||||
|
|
||||||
|
|
||||||
|
//$tmp = explode("<br>" , $page);
|
||||||
|
//$content = $tmp[4].$tmp[5].$tmp[6];
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return $content = $arr[1][0].'</div>';
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public function normalizeURLFile($content)
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
$rez = preg_match_all('/href="(.*)"/isU',$content,$arr);
|
||||||
|
//echo '<prE>'.print_r($arr,1).'</pre>';
|
||||||
|
foreach ($arr[1] as $el) {
|
||||||
|
//if (strpos($el, '/abitur') !== false) {
|
||||||
|
if (strpos($el, 'http') !== 0) {
|
||||||
|
//str_replace('/abitur', 'https://mkgtu.ru/abitur',$el);
|
||||||
|
$content = str_replace($el,'https://mkgtu.ru'.$el,$content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//$content = $arr[1][0].'</div>';
|
||||||
|
//$content = preg_replace('/href="(?!http|#)/is', 'https:\/\/mkgtu.ru', $content, -1, $count);
|
||||||
|
|
||||||
|
//$content = str_replace('href="','href="https://mkgtu.ru',$content);
|
||||||
|
//$pos = strripos($content,'/abitur');
|
||||||
|
//echo substr_replace($content, 'https://mkgtu.ru',$pos,0)
|
||||||
|
|
||||||
|
return $content;
|
||||||
|
}
|
||||||
|
}
|
|
@ -3,34 +3,16 @@
|
||||||
<div class=" fw-bolder fs-1 text-center py-5"> Веб-консультации для абитуриентов и их родителей </div>
|
<div class=" fw-bolder fs-1 text-center py-5"> Веб-консультации для абитуриентов и их родителей </div>
|
||||||
<div class="container pt-4 d-flex justify-content-center">
|
<div class="container pt-4 d-flex justify-content-center">
|
||||||
|
|
||||||
<div class="col-10"><?php
|
<div class="col-10">
|
||||||
$page = file_get_contents("https://www.mkgtu.ru/postuplenie/web-consultations/");
|
@php
|
||||||
$rez = preg_match_all('/<div class="content_info">(.*)<\/div>/isU',$page,$arr);
|
use App\Services\PageScrapper;
|
||||||
//echo '<prE>'.print_r($arr[1]).'</pre>';
|
$pageScrapper = new PageScrapper("https://www.mkgtu.ru/postuplenie/web-consultations/", '<div class=["\']content_info["\']>');
|
||||||
|
$row = $pageScrapper->getHTML();
|
||||||
|
$content = $pageScrapper->normalizeURLFile($row);
|
||||||
//$tmp = explode("<br>" , $page);
|
|
||||||
//$content = $tmp[4].$tmp[5].$tmp[6];
|
|
||||||
|
|
||||||
$content = $arr[1][0].'</div>';
|
|
||||||
$rez = preg_match_all('/href="(.*)"/isU',$content,$arr);
|
|
||||||
//echo '<prE>'.print_r($arr,1).'</pre>';
|
|
||||||
foreach ($arr[1] as $el) {
|
|
||||||
//if (strpos($el, '/abitur') !== false) {
|
|
||||||
if (strpos($el, 'http') !== 0) {
|
|
||||||
//str_replace('/abitur', 'https://mkgtu.ru/abitur',$el);
|
|
||||||
$content = str_replace($el,'https://mkgtu.ru'.$el,$content);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//$content = $arr[1][0].'</div>';
|
|
||||||
//$content = preg_replace('/href="(?!http|#)/is', 'https:\/\/mkgtu.ru', $content, -1, $count);
|
|
||||||
|
|
||||||
//$content = str_replace('href="','href="https://mkgtu.ru',$content);
|
|
||||||
//$pos = strripos($content,'/abitur');
|
|
||||||
//echo substr_replace($content, 'https://mkgtu.ru',$pos,0)
|
|
||||||
|
|
||||||
echo $content;
|
echo $content;
|
||||||
?>
|
|
||||||
|
@endphp
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@endsection
|
@endsection
|
||||||
|
|
Loading…
Reference in New Issue