if (!empty($matrixProduct['image_urls']) && is_array($matrixProduct['image_urls'])) {
$imageUrls = $matrixProduct['image_urls'];
+ $this->purgeMatrixMedia($matrixProductProperty, ['foto']);
foreach ($imageUrls as $imgUrl) {
$imgUrl = trim((string)$imgUrl);
if ($imgUrl === '' || isset($seen[$imgUrl])) {
if (!empty($info) && $info['fileType'] === 'image') {
$this->saveMatrixMediaForMatrixProperty($info, 'foto', $matrixProductProperty, $adminId);
} elseif (!empty($info)) {
- // На случай, если по ссылке оказался не image — можно пропустить или тоже сохранять как 'foto'
Yii::warning("Ожидали image, но получили {$info['fileType']} для {$imgUrl}");
}
} catch (\Throwable $e) {
$seen[$videoUrl] = true;
try {
+ $this->purgeMatrixMedia($matrixProductProperty, ['video']);
$info = FileService::saveFromUrlToUploads($videoUrl, $adminId);
if (!empty($info)) {
- $name = ($info['fileType'] === 'video') ? 'video' : 'video';
+ $name = ($info['fileType'] === 'video') ? 'video' : 'video';
$this->saveMatrixMediaForMatrixProperty($info, $name, $matrixProductProperty, $adminId);
}
} catch (\Throwable $e) {
$matrixProductProperty->flowwow_subcategory = $subcategory;
$matrixProductProperty->yandex_category = "Цветы, букеты, композиции";
-
-
if (isset($matrixProduct['properties']['Размер']['ширина'])) $matrixProductProperty->width = (float)$matrixProduct['properties']['Размер']['ширина'];
if (isset($matrixProduct['properties']['Размер']['высота'])) $matrixProductProperty->height = (float)$matrixProduct['properties']['Размер']['высота'];
return $matrixProductProperty->save();
}
- private function saveMatrixMediaForMatrixProperty(array $info, string $name, $matrixProp, $adminId) {
- // Files
- $file = new Files();
- $file->created_at = date("Y-m-d H:i:s");
- $file->entity_id = $matrixProp->id;
- $file->entity = "matrix_media";
- $file->file_type = $info['fileType']; // 'image' или 'video'
- $file->url = $info['target_base_file'];
- if(!$file->save()) {
- Yii::error("Ошибка сохранения в файлы " . json_encode($file->getErrors(), JSON_UNESCAPED_UNICODE));
- }
+ private function saveMatrixMediaForMatrixProperty(array $info, string $name, $matrixProp, $adminId)
+ {
+ // Files
+ $file = new Files();
+ $file->created_at = date("Y-m-d H:i:s");
+ $file->entity_id = $matrixProp->id;
+ $file->entity = "matrix_media";
+ $file->file_type = $info['fileType']; // 'image' или 'video'
+ $file->url = $info['target_base_file'];
+ if (!$file->save()) {
+ Yii::error("Ошибка сохранения в файлы " . json_encode($file->getErrors(), JSON_UNESCAPED_UNICODE));
+ }
- // MatrixErpMedia
- $mm = new MatrixErpMedia();
- $mm->guid = $matrixProp->guid;
- $mm->created_admin_id = $adminId;
- $mm->date = date("Y-m-d H:i:s");
- $mm->created_at = date("Y-m-d H:i:s");
- $mm->file_id = $file->id;
- $mm->name = $name; // 'foto' или 'video'
-
- if(!$mm->save()) {
- Yii::error("Ошибка сохранения в медиа " . json_encode($mm->getErrors(), JSON_UNESCAPED_UNICODE));
- }
+ // MatrixErpMedia
+ $mm = new MatrixErpMedia();
+ $mm->guid = $matrixProp->guid;
+ $mm->created_admin_id = $adminId;
+ $mm->date = date("Y-m-d H:i:s");
+ $mm->created_at = date("Y-m-d H:i:s");
+ $mm->file_id = $file->id;
+ $mm->name = $name; // 'foto' или 'video'
+
+ if (!$mm->save()) {
+ Yii::error("Ошибка сохранения в медиа " . json_encode($mm->getErrors(), JSON_UNESCAPED_UNICODE));
}
+ }
+
+ private function purgeMatrixMedia(MatrixErpProperty $matrixProp, array $names = ['foto','video']): void
+ {
+ $names = array_values(array_unique(array_filter($names)));
+ if (!$names) return;
+
+ $fileIds = (new \yii\db\Query())
+ ->select('mm.file_id')
+ ->from(MatrixErpMedia::tableName() . ' mm')
+ ->innerJoin(Files::tableName() . ' f', 'f.id = mm.file_id')
+ ->where([
+ 'mm.guid' => $matrixProp->guid,
+ 'mm.name' => $names,
+ 'f.entity' => 'matrix_media',
+ 'f.entity_id' => $matrixProp->id,
+ ])
+ ->column();
+
+ $fileIds = array_map('intval', array_unique($fileIds));
+
+ MatrixErpMedia::deleteAll(['guid' => $matrixProp->guid, 'name' => $names]);
+
+ if ($fileIds) {
+ Files::deleteAll(['id' => $fileIds, 'entity' => 'matrix_media', 'entity_id' => $matrixProp->id]);
+ }
+ }
}
\ No newline at end of file
namespace yii_app\services;
use DOMDocument;
+use DOMElement;
use DOMXPath;
class ProductParserService {
}
return [
- 'name' => $this->extractName($xpath) ?: $this->extractOgTitle($xpath), // мягкий фолбэк
+ 'name' => $this->extractName($xpath) ?: $this->extractOgTitle($xpath),
'image_url' => $mainImage,
'description' => $this->extractDescription($xpath),
'properties' => $this->extractProperties($xpath),
$node = $xpath->query("//meta[@property='og:title' or @name='og:title']/@content")->item(0);
return $node ? trim($node->nodeValue) : '';
}
- private function extractImageUrls(DOMXPath $xpath): array
+ private function extractImageUrls(DOMXPath $documentXPath): array
{
- $urls = [];
-
- $candidates = $xpath->query("
- //img[
- contains(@class,'main-image') or
- contains(@class,'main-image-content') or
- contains(@class,'swiper') or
- contains(@class,'slide') or
- contains(@class,'gallery') or
- contains(@class,'product') or
- contains(@class,'image')
- ] | //source[@type='image/jpeg' or @type='image/webp']
- ");
-
- foreach ($candidates as $el) {
- /** @var DOMElement $el */
- $src = $el->getAttribute('src') ?: $el->getAttribute('data-src');
- $srcset = $el->getAttribute('srcset') ?: $el->getAttribute('data-srcset');
-
- if ($src) $urls[] = $src;
-
- if ($srcset) {
- foreach (explode(',', $srcset) as $part) {
- $u = trim(preg_replace('~\s+\d+[wx]$~', '', trim($part))); // отрезать " 524w"
- if ($u) $urls[] = $u;
+ $imageUrls = [];
+ $dedupeByUrl = [];
+ $seenSlideIndexes = [];
+
+ $pushUrl = function (string $rawUrl) use (&$imageUrls, &$dedupeByUrl) {
+ $normalizedUrl = $this->normalizeUrl($rawUrl);
+ if ($normalizedUrl === '' || isset($dedupeByUrl[$normalizedUrl])) {
+ return;
+ }
+ $dedupeByUrl[$normalizedUrl] = true;
+ $imageUrls[] = $normalizedUrl;
+ };
+
+ // Первый wrapper основного слайдера товара
+ $wrappers = $documentXPath->query("(//div[contains(@class,'product-slider')]//div[contains(@class,'product-detail-slider')]//div[contains(@class,'swiper-wrapper')])[1]");
+ if ($wrappers && $wrappers->length) {
+ /** @var DOMElement $wrapper */
+ $wrapper = $wrappers->item(0);
+ $localXPath = new DOMXPath($wrapper->ownerDocument);
+
+ // PASS 1: Слайды без класса duplicate (основные)
+ $regularSlides = $localXPath->query(".//div[contains(@class,'swiper-slide') and not(contains(@class,'duplicate'))]", $wrapper);
+ if ($regularSlides && $regularSlides->length) {
+ /** @var DOMElement $slide */
+ foreach ($regularSlides as $slide) {
+ $slideIndex = $slide->getAttribute('data-swiper-slide-index') ?: spl_object_id($slide);
+ if (isset($seenSlideIndexes[$slideIndex])) {
+ continue;
+ }
+
+ $candidateUrl = $this->extractSlotUrl($slide);
+ if (!$this->isCdnFlowersUrl($candidateUrl)) {
+ $candidateUrl = $this->extractNearestZoomBackground($slide) ?: $candidateUrl;
+ }
+ if ($candidateUrl && $this->normalizeUrl($candidateUrl) !== '') {
+ $seenSlideIndexes[$slideIndex] = true;
+ $pushUrl($candidateUrl);
+ }
+ }
+ }
+
+ // PASS 2: Дубликаты — добираем отсутствующие индексы
+ $duplicateSlides = $localXPath->query(".//div[contains(@class,'swiper-slide') and contains(@class,'duplicate')]", $wrapper);
+ if ($duplicateSlides && $duplicateSlides->length) {
+ /** @var DOMElement $slide */
+ foreach ($duplicateSlides as $slide) {
+ $slideIndex = $slide->getAttribute('data-swiper-slide-index');
+ if ($slideIndex === '' || isset($seenSlideIndexes[$slideIndex])) {
+ continue;
+ }
+
+ $candidateUrl = $this->extractSlotUrl($slide);
+ if (!$this->isCdnFlowersUrl($candidateUrl)) {
+ $candidateUrl = $this->extractNearestZoomBackground($slide) ?: $candidateUrl;
+ }
+ if ($candidateUrl && $this->normalizeUrl($candidateUrl) !== '') {
+ $seenSlideIndexes[$slideIndex] = true;
+ $pushUrl($candidateUrl);
+ }
}
}
}
- foreach ($xpath->query("//script[@type='application/ld+json']") as $script) {
- $json = trim($script->nodeValue ?? '');
- if (!$json) continue;
- $data = json_decode($json, true);
- if (!is_array($data)) continue;
-
- $graphs = isset($data['@graph']) && is_array($data['@graph']) ? $data['@graph'] : [$data];
- foreach ($graphs as $node) {
- if (!is_array($node)) continue;
- if (($node['@type'] ?? '') === 'Product' && !empty($node['image'])) {
- if (is_string($node['image'])) $urls[] = $node['image'];
- if (is_array($node['image'])) $urls = array_merge($urls, array_values($node['image']));
+ // Глобальный фолбэк: фон из js-image-zoom
+ if (count($imageUrls) === 0) {
+ $fallbackUrl = $this->extractZoomBackground($documentXPath);
+ if ($fallbackUrl) {
+ $pushUrl($fallbackUrl);
+ }
+ }
+
+ return array_values($imageUrls);
+ }
+
+ /** Нормализация и фильтрация URL */
+ private function normalizeUrl(string $url): string
+ {
+ $url = trim($url);
+ if ($url === '') return '';
+
+ if (!$this->isCdnImageUrl($url)) return '';
+ // Только фото товаров, без SEO
+ if (!preg_match('~/data/flowers/~i', $url) || preg_match('~/data/seo/.*\.webp(?:\?.*)?$~i', $url)) return '';
+ // Удаляем миниатюры 262x262
+ if (preg_match('~/data/flowers/262x262/~i', $url)) return '';
+ // 524x524 -> 1000x1000
+ $url = preg_replace('~(/data/flowers/)524x524(/)~i', '$1' . '1000x1000' . '$2', $url);
+ // Оставляем только 1000x1000
+ if (!preg_match('~/data/flowers/1000x1000/~i', $url)) return '';
+
+ return $url;
+ }
+
+ /**
+ * Достаёт URL из одного слайда:
+ * приоритет: <video poster> → <img data-src> → <img src>
+ * (может вернуть локальный путь — его потом заменим на CDN через zoom-background)
+ */
+ private function extractSlotUrl(DOMElement $slideElement): ?string
+ {
+ $videoNodes = $slideElement->getElementsByTagName('video');
+ if ($videoNodes->length) {
+ $posterUrl = $videoNodes->item(0)->getAttribute('poster');
+ if ($posterUrl) return $posterUrl;
+ }
+
+ $imgNodes = $slideElement->getElementsByTagName('img');
+ if ($imgNodes->length) {
+ $imgElement = $imgNodes->item(0);
+ $srcOrDataSrc = $imgElement->getAttribute('data-src') ?: $imgElement->getAttribute('src');
+ if ($srcOrDataSrc) return $srcOrDataSrc;
+ }
+
+ return null;
+ }
+
+ /** Фон из ближайшего js-image-zoom внутри того же блока .main-image */
+ private function extractNearestZoomBackground(DOMElement $contextSlide): ?string
+ {
+ $document = $contextSlide->ownerDocument;
+ $localXPath = new DOMXPath($document);
+
+ $walker = $contextSlide;
+ while ($walker && $walker->nodeType === XML_ELEMENT_NODE) {
+ if ($walker->attributes && ($class = $walker->getAttribute('class')) && strpos($class, 'main-image') !== false) {
+ $styleAttrs = $localXPath->query(".//div[@id='js-image-zoom' or contains(@class,'product-detail-image-zoom')]/@style", $walker);
+ if ($styleAttrs && $styleAttrs->length) {
+ $style = $styleAttrs->item(0)->nodeValue ?? '';
+ $url = $this->parseBackgroundImageUrl($style);
+ if ($url) {
+ return $url;
+ }
}
+ break;
}
+ $walker = $walker->parentNode instanceof DOMElement ? $walker->parentNode : null;
}
+ return null;
+ }
- $urls = array_values(array_unique(array_filter(array_map('trim', $urls))));
- $urls = array_values(array_filter($urls, function ($u) {
- return (bool)preg_match('~^https?://content\d*\.flowwow-images\.com/.+\.(?:jpe?g|png|webp|gif)(?:\?.*)?$~i', $u);
- }));
+ /** Глобальный фолбэк: фон из #js-image-zoom / .product-detail-image-zoom на странице */
+ private function extractZoomBackground(DOMXPath $documentXPath): ?string
+ {
+ $styleAttrs = $documentXPath->query("//div[@id='js-image-zoom' or contains(@class,'product-detail-image-zoom')]/@style");
+ if ($styleAttrs && $styleAttrs->length) {
+ $style = $styleAttrs->item(0)->nodeValue ?? '';
+ return $this->parseBackgroundImageUrl($style);
+ }
+ return null;
+ }
- return $urls;
+ /** Достаём url(...) из CSS-строки background-image */
+ private function parseBackgroundImageUrl(string $styleDeclaration): ?string
+ {
+ if (preg_match('~background-image:\s*url\((["\']?)(?<u>https?://[^)\'"]+)\1\)~i', $styleDeclaration, $matches)) {
+ return $matches['u'];
+ }
+ return null;
+ }
+
+ /** Валидный CDN-URL с изображением */
+ private function isCdnImageUrl(?string $url): bool
+ {
+ if (!$url) return false;
+ return (bool)preg_match('~^https?://content\d*\.flowwow-images\.com/.+\.(?:jpe?g|png|webp|gif)(?:\?.*)?$~i', $url);
}
+ /** Проверка, что URL относится к /data/flowers/* на CDN */
+ private function isCdnFlowersUrl(?string $url): bool
+ {
+ if (!$this->isCdnImageUrl($url)) return false;
+ return (bool)preg_match('~/data/flowers/~i', $url);
+ }
+
+
+
private function extractDescription(DOMXPath $xpath): string
{
$expression =