From 96667231c2445e7b1c4d768acc9c4b92b598012e Mon Sep 17 00:00:00 2001 From: Vladimir Fomichev Date: Tue, 30 Sep 2025 17:34:23 +0300 Subject: [PATCH] =?utf8?q?=D0=9F=D1=80=D0=B0=D0=B2=D0=B8=D0=BB=D1=8C=D0=BD?= =?utf8?q?=D1=8B=D0=B9=20=D0=BF=D0=BE=D1=80=D1=8F=D0=B4=D0=BE=D0=BA=20?= =?utf8?q?=D0=BA=D0=B0=D1=80=D1=82=D0=B8=D0=BD=D0=BE=D0=BA=20=D0=B8=20?= =?utf8?q?=D0=BE=D1=87=D0=B8=D1=81=D1=82=D0=BA=D0=B0=20=D0=BF=D1=80=D0=B8?= =?utf8?q?=20=D0=BF=D0=BE=D0=B2=D1=82=D0=BE=D1=80=D0=BD=D0=BE=D0=B9=20?= =?utf8?q?=D0=B7=D0=B0=D0=B3=D1=80=D1=83=D0=B7=D0=BA=D0=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- erp24/controllers/MatrixErpController.php | 80 +++++--- erp24/services/ProductParserService.php | 212 +++++++++++++++++----- 2 files changed, 221 insertions(+), 71 deletions(-) diff --git a/erp24/controllers/MatrixErpController.php b/erp24/controllers/MatrixErpController.php index aec0f927..c82762b1 100644 --- a/erp24/controllers/MatrixErpController.php +++ b/erp24/controllers/MatrixErpController.php @@ -453,6 +453,7 @@ class MatrixErpController extends Controller if (!empty($matrixProduct['image_urls']) && is_array($matrixProduct['image_urls'])) { $imageUrls = $matrixProduct['image_urls']; + $this->purgeMatrixMedia($matrixProductProperty, ['foto']); foreach ($imageUrls as $imgUrl) { $imgUrl = trim((string)$imgUrl); if ($imgUrl === '' || isset($seen[$imgUrl])) { @@ -465,7 +466,6 @@ class MatrixErpController extends Controller if (!empty($info) && $info['fileType'] === 'image') { $this->saveMatrixMediaForMatrixProperty($info, 'foto', $matrixProductProperty, $adminId); } elseif (!empty($info)) { - // На случай, если по ссылке оказался не image — можно пропустить или тоже сохранять как 'foto' Yii::warning("Ожидали image, но получили {$info['fileType']} для {$imgUrl}"); } } catch (\Throwable $e) { @@ -481,9 +481,10 @@ class MatrixErpController extends Controller $seen[$videoUrl] = true; try { + $this->purgeMatrixMedia($matrixProductProperty, ['video']); $info = FileService::saveFromUrlToUploads($videoUrl, $adminId); if (!empty($info)) { - $name = ($info['fileType'] === 'video') ? 'video' : 'video'; + $name = ($info['fileType'] === 'video') ? 'video' : 'video'; $this->saveMatrixMediaForMatrixProperty($info, $name, $matrixProductProperty, $adminId); } } catch (\Throwable $e) { @@ -503,8 +504,6 @@ class MatrixErpController extends Controller $matrixProductProperty->flowwow_subcategory = $subcategory; $matrixProductProperty->yandex_category = "Цветы, букеты, композиции"; - - if (isset($matrixProduct['properties']['Размер']['ширина'])) $matrixProductProperty->width = (float)$matrixProduct['properties']['Размер']['ширина']; if (isset($matrixProduct['properties']['Размер']['высота'])) $matrixProductProperty->height = (float)$matrixProduct['properties']['Размер']['высота']; @@ -512,29 +511,56 @@ class MatrixErpController extends Controller return $matrixProductProperty->save(); } - private function saveMatrixMediaForMatrixProperty(array $info, string $name, $matrixProp, $adminId) { - // Files - $file = new Files(); - $file->created_at = date("Y-m-d H:i:s"); - $file->entity_id = $matrixProp->id; - $file->entity = "matrix_media"; - $file->file_type = $info['fileType']; // 'image' или 'video' - $file->url = $info['target_base_file']; - if(!$file->save()) { - Yii::error("Ошибка сохранения в файлы " . json_encode($file->getErrors(), JSON_UNESCAPED_UNICODE)); - } + private function saveMatrixMediaForMatrixProperty(array $info, string $name, $matrixProp, $adminId) + { + // Files + $file = new Files(); + $file->created_at = date("Y-m-d H:i:s"); + $file->entity_id = $matrixProp->id; + $file->entity = "matrix_media"; + $file->file_type = $info['fileType']; // 'image' или 'video' + $file->url = $info['target_base_file']; + if (!$file->save()) { + Yii::error("Ошибка сохранения в файлы " . json_encode($file->getErrors(), JSON_UNESCAPED_UNICODE)); + } - // MatrixErpMedia - $mm = new MatrixErpMedia(); - $mm->guid = $matrixProp->guid; - $mm->created_admin_id = $adminId; - $mm->date = date("Y-m-d H:i:s"); - $mm->created_at = date("Y-m-d H:i:s"); - $mm->file_id = $file->id; - $mm->name = $name; // 'foto' или 'video' - - if(!$mm->save()) { - Yii::error("Ошибка сохранения в медиа " . json_encode($mm->getErrors(), JSON_UNESCAPED_UNICODE)); - } + // MatrixErpMedia + $mm = new MatrixErpMedia(); + $mm->guid = $matrixProp->guid; + $mm->created_admin_id = $adminId; + $mm->date = date("Y-m-d H:i:s"); + $mm->created_at = date("Y-m-d H:i:s"); + $mm->file_id = $file->id; + $mm->name = $name; // 'foto' или 'video' + + if (!$mm->save()) { + Yii::error("Ошибка сохранения в медиа " . json_encode($mm->getErrors(), JSON_UNESCAPED_UNICODE)); } + } + + private function purgeMatrixMedia(MatrixErpProperty $matrixProp, array $names = ['foto','video']): void + { + $names = array_values(array_unique(array_filter($names))); + if (!$names) return; + + $fileIds = (new \yii\db\Query()) + ->select('mm.file_id') + ->from(MatrixErpMedia::tableName() . ' mm') + ->innerJoin(Files::tableName() . ' f', 'f.id = mm.file_id') + ->where([ + 'mm.guid' => $matrixProp->guid, + 'mm.name' => $names, + 'f.entity' => 'matrix_media', + 'f.entity_id' => $matrixProp->id, + ]) + ->column(); + + $fileIds = array_map('intval', array_unique($fileIds)); + + MatrixErpMedia::deleteAll(['guid' => $matrixProp->guid, 'name' => $names]); + + if ($fileIds) { + Files::deleteAll(['id' => $fileIds, 'entity' => 'matrix_media', 'entity_id' => $matrixProp->id]); + } + } } \ No newline at end of file diff --git a/erp24/services/ProductParserService.php b/erp24/services/ProductParserService.php index 6bb84976..4d1a86d4 100644 --- a/erp24/services/ProductParserService.php +++ b/erp24/services/ProductParserService.php @@ -2,6 +2,7 @@ namespace yii_app\services; use DOMDocument; +use DOMElement; use DOMXPath; class ProductParserService { @@ -25,7 +26,7 @@ class ProductParserService { } return [ - 'name' => $this->extractName($xpath) ?: $this->extractOgTitle($xpath), // мягкий фолбэк + 'name' => $this->extractName($xpath) ?: $this->extractOgTitle($xpath), 'image_url' => $mainImage, 'description' => $this->extractDescription($xpath), 'properties' => $this->extractProperties($xpath), @@ -63,61 +64,184 @@ class ProductParserService { $node = $xpath->query("//meta[@property='og:title' or @name='og:title']/@content")->item(0); return $node ? trim($node->nodeValue) : ''; } - private function extractImageUrls(DOMXPath $xpath): array + private function extractImageUrls(DOMXPath $documentXPath): array { - $urls = []; - - $candidates = $xpath->query(" - //img[ - contains(@class,'main-image') or - contains(@class,'main-image-content') or - contains(@class,'swiper') or - contains(@class,'slide') or - contains(@class,'gallery') or - contains(@class,'product') or - contains(@class,'image') - ] | //source[@type='image/jpeg' or @type='image/webp'] - "); - - foreach ($candidates as $el) { - /** @var DOMElement $el */ - $src = $el->getAttribute('src') ?: $el->getAttribute('data-src'); - $srcset = $el->getAttribute('srcset') ?: $el->getAttribute('data-srcset'); - - if ($src) $urls[] = $src; - - if ($srcset) { - foreach (explode(',', $srcset) as $part) { - $u = trim(preg_replace('~\s+\d+[wx]$~', '', trim($part))); // отрезать " 524w" - if ($u) $urls[] = $u; + $imageUrls = []; + $dedupeByUrl = []; + $seenSlideIndexes = []; + + $pushUrl = function (string $rawUrl) use (&$imageUrls, &$dedupeByUrl) { + $normalizedUrl = $this->normalizeUrl($rawUrl); + if ($normalizedUrl === '' || isset($dedupeByUrl[$normalizedUrl])) { + return; + } + $dedupeByUrl[$normalizedUrl] = true; + $imageUrls[] = $normalizedUrl; + }; + + // Первый wrapper основного слайдера товара + $wrappers = $documentXPath->query("(//div[contains(@class,'product-slider')]//div[contains(@class,'product-detail-slider')]//div[contains(@class,'swiper-wrapper')])[1]"); + if ($wrappers && $wrappers->length) { + /** @var DOMElement $wrapper */ + $wrapper = $wrappers->item(0); + $localXPath = new DOMXPath($wrapper->ownerDocument); + + // PASS 1: Слайды без класса duplicate (основные) + $regularSlides = $localXPath->query(".//div[contains(@class,'swiper-slide') and not(contains(@class,'duplicate'))]", $wrapper); + if ($regularSlides && $regularSlides->length) { + /** @var DOMElement $slide */ + foreach ($regularSlides as $slide) { + $slideIndex = $slide->getAttribute('data-swiper-slide-index') ?: spl_object_id($slide); + if (isset($seenSlideIndexes[$slideIndex])) { + continue; + } + + $candidateUrl = $this->extractSlotUrl($slide); + if (!$this->isCdnFlowersUrl($candidateUrl)) { + $candidateUrl = $this->extractNearestZoomBackground($slide) ?: $candidateUrl; + } + if ($candidateUrl && $this->normalizeUrl($candidateUrl) !== '') { + $seenSlideIndexes[$slideIndex] = true; + $pushUrl($candidateUrl); + } + } + } + + // PASS 2: Дубликаты — добираем отсутствующие индексы + $duplicateSlides = $localXPath->query(".//div[contains(@class,'swiper-slide') and contains(@class,'duplicate')]", $wrapper); + if ($duplicateSlides && $duplicateSlides->length) { + /** @var DOMElement $slide */ + foreach ($duplicateSlides as $slide) { + $slideIndex = $slide->getAttribute('data-swiper-slide-index'); + if ($slideIndex === '' || isset($seenSlideIndexes[$slideIndex])) { + continue; + } + + $candidateUrl = $this->extractSlotUrl($slide); + if (!$this->isCdnFlowersUrl($candidateUrl)) { + $candidateUrl = $this->extractNearestZoomBackground($slide) ?: $candidateUrl; + } + if ($candidateUrl && $this->normalizeUrl($candidateUrl) !== '') { + $seenSlideIndexes[$slideIndex] = true; + $pushUrl($candidateUrl); + } } } } - foreach ($xpath->query("//script[@type='application/ld+json']") as $script) { - $json = trim($script->nodeValue ?? ''); - if (!$json) continue; - $data = json_decode($json, true); - if (!is_array($data)) continue; - - $graphs = isset($data['@graph']) && is_array($data['@graph']) ? $data['@graph'] : [$data]; - foreach ($graphs as $node) { - if (!is_array($node)) continue; - if (($node['@type'] ?? '') === 'Product' && !empty($node['image'])) { - if (is_string($node['image'])) $urls[] = $node['image']; - if (is_array($node['image'])) $urls = array_merge($urls, array_values($node['image'])); + // Глобальный фолбэк: фон из js-image-zoom + if (count($imageUrls) === 0) { + $fallbackUrl = $this->extractZoomBackground($documentXPath); + if ($fallbackUrl) { + $pushUrl($fallbackUrl); + } + } + + return array_values($imageUrls); + } + + /** Нормализация и фильтрация URL */ + private function normalizeUrl(string $url): string + { + $url = trim($url); + if ($url === '') return ''; + + if (!$this->isCdnImageUrl($url)) return ''; + // Только фото товаров, без SEO + if (!preg_match('~/data/flowers/~i', $url) || preg_match('~/data/seo/.*\.webp(?:\?.*)?$~i', $url)) return ''; + // Удаляем миниатюры 262x262 + if (preg_match('~/data/flowers/262x262/~i', $url)) return ''; + // 524x524 -> 1000x1000 + $url = preg_replace('~(/data/flowers/)524x524(/)~i', '$1' . '1000x1000' . '$2', $url); + // Оставляем только 1000x1000 + if (!preg_match('~/data/flowers/1000x1000/~i', $url)) return ''; + + return $url; + } + + /** + * Достаёт URL из одного слайда: + * приоритет: