Compare commits

...

5 Commits

Author SHA1 Message Date
aslan 36579ed25f Merge pull request 'main' (#10) from RomanGolienko/Roman_applicant-site:main into main
Tests & Lint & Deploy to Railway / build (20.x, 8.2) (push) Failing after 1m25s Details
Tests & Lint & Deploy to Railway / deploy (8.1) (push) Has been skipped Details
Reviewed-on: http://172.17.254.104/aslan/applicant-site/pulls/10
2024-01-29 17:34:00 +03:00
ROMANGOLIENKO b20dcc4ad8 new Scrapper embodying
Tests & Lint & Deploy to Railway / build (20.x, 8.2) (pull_request) Failing after 1m19s Details
Tests & Lint & Deploy to Railway / deploy (8.1) (pull_request) Has been skipped Details
2024-01-29 17:25:49 +03:00
ROMANGOLIENKO 4c5c0e6ddb actual version, device change 2024-01-29 10:57:37 +03:00
ROMANGOLIENKO 66939341a3 newScrapper 2024-01-29 10:57:37 +03:00
ROMANGOLIENKO 3d33a03f97 actual version, device change 2024-01-29 10:57:37 +03:00
20 changed files with 190 additions and 127 deletions

2
.gitignore vendored
View File

@ -1,3 +1,4 @@
/public/img/icons/play-button.png
/.phpunit.cache
/node_modules
/public/build
@ -21,3 +22,4 @@ yarn-error.log
/.phpstorm.meta.php
/_ide_helper.php
/_ide_helper_models.php
/public/img/icons/play-button.png

View File

@ -2,6 +2,9 @@
namespace App\Services;
use DiDom\Document;
/*
class PageScrapper
{
private string $url;
@ -22,7 +25,7 @@ class PageScrapper
$arr = [];
$rez = preg_match_all($strForPregMatch, $page, $arr);
return $content = $arr[1][0];
return $content = $arr[0][0];
}
public function normalizeURLFile($content)
@ -60,3 +63,72 @@ class PageScrapper
return $content;
}
}
*/
class PageScrapper
{
private string $url;
private string $contentMarker;
public function __construct($url, $contentMarker)
{
$this->url = $url;
$this->contentMarker = $contentMarker;
$this->document = new Document($this->url, true);
}
public function getFullHTML()
{
return $this->document;
}
public function printHTML()
{
$rez = $this->document;
$content = $rez->first($this->contentMarker)->html();
return $content;
}
public function normalizePath()
{
$rez = $this->document;
$content = $rez->first($this->contentMarker);
$links = $content->find('a[href]');
$srclinks = $content->find('img[src]');
$html0 = $content->html();
foreach ($links as $k => $link) {
$href = $link->attr('href');
if (!str_contains($link->attr('href'), "https://")) {
$tmp = explode('/', rawurldecode($href));
foreach ( $tmp as $k => $v) {
$tmp[$k] = rawurlencode($v);
}
$href = implode('/', $tmp);
$html0 = str_replace($href, 'https://mkgtu.ru' . $href, $html0);
}
}
foreach ($srclinks as $k => $srclink) {
$src = $srclink->attr('src');
if (!str_contains($srclink->attr('src'), "https://")) {
$tmp = explode('/', rawurldecode($src));
foreach ( $tmp as $k => $v) {
$tmp[$k] = rawurlencode($v);
}
$src = implode('/', $tmp);
$html0 = str_replace($src, 'https://mkgtu.ru' . $src, $html0);
}
}
str_replace('st yle', 'style', $html0);
return $html0;
}
}

View File

@ -8,8 +8,9 @@
],
"license": "MIT",
"require": {
"php": "^8.2",
"php": "^8.1|8.2",
"guzzlehttp/guzzle": "^7.8.1",
"imangazaliev/didom": "^2.0",
"laravel/framework": "^10.41.0",
"laravel/sanctum": "^3.3.3",
"laravel/tinker": "^2.9.0",
@ -64,6 +65,7 @@
}
},
"config": {
"platform-check": false,
"optimize-autoloader": true,
"preferred-install": "dist",
"sort-packages": true,

View File

@ -319,9 +319,27 @@
</div>
</div>
</div>
</div>
<!-- brnad section end -->
</div>
<!-- brnad section end -->
<style>
body {
font-weight: 300;
font-size: 16px;
line-height: 140%;
color: #222222;
margin: 12px 0;
text-align: justify;
}
a {
color: #005951;
}
</style>
@yield('content')
<!-- footer area start -->

View File

@ -1,15 +1,15 @@
@extends('layouts.applicant-layout')
@section('content')
<div class=" fw-bolder fs-1 text-center py-5 lh-lg"> Для поступающих на обучение по программам аспирантуры </div>
<div class="container pt-4 d-flex justify-content-center">
<div class=" fw-bolder fs-1 text-center py-5 lh-lg"> Онлайн-консультации для поступающих и родителей </div>
<div class="container fs-5 pt-4 d-flex justify-content-center">
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/priemnaya-kampaniya/aspirantura/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
$PageScrapper = new PageScrapper("https://www.mkgtu.ru/postuplenie/web-consultations/",".content_info");
$content = $PageScrapper->normalizePath();
echo $content;
@endphp

View File

@ -6,10 +6,9 @@
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/priemnaya-kampaniya/kolledzh/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
$PageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/priemnaya-kampaniya/kolledzh/",".content_info");
$content = $PageScrapper->normalizePath();
echo $content;
@endphp

View File

@ -6,12 +6,9 @@
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/priemnaya-kampaniya/ordinatura/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
$PageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/priemnaya-kampaniya/ordinatura/",".content_info");
$content = $PageScrapper->normalizePath();
echo $content;
@endphp
</div>

View File

@ -6,10 +6,8 @@
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/priemnaya-kampaniya/platnye-obrazovatelnye-uslugi/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
$PageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/priemnaya-kampaniya/platnye-obrazovatelnye-uslugi/",".content_info");
$content = $PageScrapper->normalizePath();
echo $content;
@endphp

View File

@ -6,10 +6,8 @@
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/priemnaya-kampaniya/spetsialitet-magistratura-2022/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
$PageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/priemnaya-kampaniya/spetsialitet-magistratura-2022/",".content_info");
$content = $PageScrapper->normalizePath();
echo $content;
@endphp

View File

@ -6,15 +6,8 @@
<div class="col-10 text-center pb-4 ">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/priemnaya-kampaniya/videomaterialy-dlya-postupayushchikh/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
// $content = str_replace(
// '<img width="614" alt="Обложка Госуслуги_page-0001.jpg" src="/upload/medialibrary/c76/c761087fd6938bd8eb8708e9e036679e.jpg" height="346" title="Обложка Госуслуги_page-0001.jpg">',
// '<img width="614" alt="Обложка Госуслуги_page-0001.jpg" src="https://mkgtu.ru/upload/medialibrary/c76/c761087fd6938bd8eb8708e9e036679e.jpg" height="346" title="Обложка Госуслуги_page-0001.jpg" class="border border-3 rounded-3 border-secondary">',
// $content);
$PageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/priemnaya-kampaniya/videomaterialy-dlya-postupayushchikh/",".content_info");
$content = $PageScrapper->normalizePath();
echo $content;
@endphp

View File

@ -6,10 +6,8 @@
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://www.mkgtu.ru/postuplenie/web-consultations/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
$PageScrapper = new PageScrapper("https://www.mkgtu.ru/postuplenie/web-consultations/",".content_info");
$content = $PageScrapper->normalizePath();
echo $content;
@endphp

View File

@ -1,4 +1,5 @@
@extends('layouts.applicant-layout')
@section('content')
<style>
@ -36,14 +37,11 @@
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
$PageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/podat-dokumenty-onlayn/",".content_info");
$content = $PageScrapper->printHTML();
$content = $PageScrapper->normalizePath();
echo $content;
@endphp
</div>
</div>

View File

@ -6,35 +6,20 @@
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/inostrannym-abiturientam/akademicheskaya-mobilnost-i-mezhdunarodnoe-sotrudnichestvo/akademicheskaya-mobilnost-i-fondy/", '<div class=["\']content_info["\']>');
$PageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/inostrannym-abiturientam/akademicheskaya-mobilnost-i-mezhdunarodnoe-sotrudnichestvo/akademicheskaya-mobilnost-i-fondy/",".content_info");
$PageScrapper2 = new PageScrapper("https://mkgtu.ru/postuplenie/inostrannym-abiturientam/akademicheskaya-mobilnost-i-mezhdunarodnoe-sotrudnichestvo/individualnaya-mobilnost-studentov-i-aspirantov/",".content_info");
$PageScrapper3 = new PageScrapper("https://mkgtu.ru/postuplenie/inostrannym-abiturientam/akademicheskaya-mobilnost-i-mezhdunarodnoe-sotrudnichestvo/kak-stat-uchastnikom-programm-akademicheskoy-mobilnosti/",".content_info");
$row = $pageScrapper->getHTML();
//$tmp = explode('<footer',$row); $row = $tmp[0];
$row = $pageScrapper->cutHTML($row,'/<footer(.*)<\/footer>/isU');
$content = '<div class=" fw-bolder fs-3 text-center py-5 lh-lg"> Академическая мобильность и фонды </div>';
$content .= $PageScrapper->normalizePath();
$content .= '<div class=" fw-bolder fs-3 text-center py-5 lh-lg"> Индивидуальная мобильность студентов и аспирантов </div>';
$content .= $PageScrapper2->normalizePath();
$content .= '<div class=" fw-bolder fs-3 text-center py-5 lh-lg"> Как стать участником программ академической мобильности </div>';
$content .= $PageScrapper3->normalizePath();
$pageScrapper2 = new PageScrapper("https://mkgtu.ru/postuplenie/inostrannym-abiturientam/akademicheskaya-mobilnost-i-mezhdunarodnoe-sotrudnichestvo/individualnaya-mobilnost-studentov-i-aspirantov/", '<div class=["\']content_info["\']>');
$row2 = $pageScrapper2->getHTML();
//$tmp = explode('<footer',$row2); $row2 = $tmp[0];
$row2 = $pageScrapper2->cutHTML($row2,'/<footer(.*)<\/footer>/isU');
$pageScrapper3 = new PageScrapper("https://mkgtu.ru/postuplenie/inostrannym-abiturientam/akademicheskaya-mobilnost-i-mezhdunarodnoe-sotrudnichestvo/kak-stat-uchastnikom-programm-akademicheskoy-mobilnosti/", '<div class=["\']content_info["\']>');
$row3 = $pageScrapper3->getHTML();
//$tmp = explode('<footer',$row3); $row3 = $tmp[0];
$row3 = $pageScrapper3->cutHTML($row3,'/<footer(.*)<\/footer>/isU');
$content = $row . $row2 . $row3;
//dd($content);
//$content = $pageScrapper->cutHTML($row,'/<footer(.*)<\/footer>/isU');
// $content = $pageScrapper->normalizeURLFile($content);
echo htmlspecialchars($content);
echo $content;
@endphp

View File

@ -1,15 +1,15 @@
@extends('layouts.applicant-layout')
@section('content')
<div class=" fw-bolder fs-1 text-center py-5 lh-lg"> Кафедры </div>
<div class="container py-4 d-flex justify-content-center">
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/inostrannym-abiturientam/kafedry/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
$PageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/inostrannym-abiturientam/kafedry/",".content_info");
$content = $PageScrapper->printHTML();
$content = $PageScrapper->normalizePath();
echo $content;
@endphp

View File

@ -1,15 +1,21 @@
@extends('layouts.applicant-layout')
@section('content')
<style>
a {
color: #005951;
}
</style>
<div class=" fw-bolder fs-1 text-center py-5 lh-lg"> Международная деятельность </div>
<div class="container py-4 d-flex justify-content-center">
<div class="container fs-5 pt-4 d-flex justify-content-center">
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/inostrannym-abiturientam/mezhdunarodnaya-deyatelnost/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
$PageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/inostrannym-abiturientam/mezhdunarodnaya-deyatelnost/",".content_info");
$content = $PageScrapper->printHTML();
$content = $PageScrapper->normalizePath();
echo $content;
@endphp
@ -17,3 +23,4 @@
</div>
</div>
@endsection

View File

@ -1,15 +1,19 @@
@extends('layouts.applicant-layout')
@section('content')
<style>
a {
color: #005951;
}
</style>
<div class=" fw-bolder fs-1 text-center py-5 lh-lg"> Общие сведения </div>
<div class="container py-4 d-flex justify-content-center">
<div class="container fs-5 pt-4 d-flex justify-content-center">
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/inostrannym-abiturientam/obshchie-svedeniya/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
$PageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/inostrannym-abiturientam/obshchie-svedeniya/",".content_info");
$content = $PageScrapper->printHTML();
$content = $PageScrapper->normalizePath();
echo $content;
@endphp
@ -17,3 +21,5 @@
</div>
</div>
@endsection

View File

@ -1,15 +1,15 @@
@extends('layouts.applicant-layout')
@section('content')
<div class=" fw-bolder fs-1 text-center py-5 lh-lg"> Центр Международного образования </div>
<div class="container py-4 d-flex justify-content-center">
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/inostrannym-abiturientam/tsentr-mezhdunarodnogo-obrazovaniya/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
$PageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/inostrannym-abiturientam/tsentr-mezhdunarodnogo-obrazovaniya/",".content_info");
$content = $PageScrapper->printHTML();
$content = $PageScrapper->normalizePath();
echo $content;
@endphp

View File

@ -1,5 +1,8 @@
@extends('layouts.applicant-layout')
@section('content')
<div class=" fw-bolder fs-1 text-center py-5 lh-lg"> Олимпиады для школьников </div>
{{-- пофиксить!!!!--}}
<div class="container py-4 d-flex justify-content-center" style="padding-left: 150px;" >
@ -7,20 +10,9 @@
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/olimpiady-dlya-shkolnikov/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
// $tmp = preg_match_all('/<p (.*)>/isU', "https://mkgtu.ru/postuplenie/olimpiady-dlya-shkolnikov/", $arr);
//
// $arr[0] = array_unique($arr[0]);
// $str = $arr[0][0];
$content = str_replace('<p style="text-align: left;">', '<p>', $content);
$PageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/olimpiady-dlya-shkolnikov/",".content_info");
$content = $PageScrapper->printHTML();
$content = $PageScrapper->normalizePath();
echo $content;
@endphp

View File

@ -5,7 +5,7 @@
<link rel="stylesheet" type="text/css" href="{{ URL::to('https://mkgtu.ru/sveden/assets/review/v1/common/css/vendor.css?1694691653153304') }}">
<link rel="stylesheet" type="text/css" href="{{ URL::to('https://mkgtu.ru/vikon/sveden/assets/review/v1/common/css/vendor.css?1706157854153304') }}">
<link rel="stylesheet" type="text/css" href="{{ URL::to('https://mkgtu.ru/vikon/sveden/assets/review/v1/common/css/sveden.css?170615785463681') }}">
<link rel="stylesheet" type="text/css" href="{{ URL::to('https://mkgtu.ru/dist/css/style.css?1705561951294124') }}">
@endsection
@section('content')
@ -15,10 +15,9 @@
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/sveden/paid_edu/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
$PageScrapper = new PageScrapper("https://mkgtu.ru/sveden/paid_edu/",".content_info");
$content = $PageScrapper->printHTML();
$content = $PageScrapper->normalizePath();
echo $content;
@endphp

View File

@ -2,15 +2,14 @@
@section('content')
<div class=" fw-bolder fs-1 text-center py-5 lh-lg"> Подготовительные курсы </div>
<div class="container pt-4 d-flex justify-content-center">
<div class="container fs-5 pt-4 d-flex justify-content-center">
<div class="col-10">
@php
use App\Services\PageScrapper;
$pageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/podgotovitelnye-kursy/", '<div class=["\']content_info["\']>');
$row = $pageScrapper->getHTML();
$content = $pageScrapper->normalizeURLFile($row);
$content = $pageScrapper->cutHTML($content,'/<footer(.*)<\/footer>/isU');
$PageScrapper = new PageScrapper("https://mkgtu.ru/postuplenie/podgotovitelnye-kursy/",".content_info");
$content = $PageScrapper->printHTML();
$content = $PageScrapper->normalizePath();
echo $content;
@endphp