--- /dev/null
+<?php
+
+declare(strict_types=1);
+
+namespace yii_app\services\automark;
+
+class SimilarityMatcher
+{
+ private const STOP_WORDS = ['и', 'в', 'на', 'с', 'по', 'для', 'из', 'от', 'до', 'за', 'при', 'под'];
+
+ /**
+ * Найти лучшее совпадение из размеченного корпуса.
+ *
+ * @param string $name Имя нового товара
+ * @param array $corpus Массив ['name'=>..., 'category'=>..., 'species'=>..., ...]
+ */
+ public function findBestMatch(string $name, array $corpus): ?ParseResult
+ {
+ if (empty($corpus)) {
+ return null;
+ }
+
+ $queryTokens = self::tokenize($name);
+ if (empty($queryTokens)) {
+ return null;
+ }
+
+ $bestScore = -1.0;
+ $bestItem = null;
+
+ foreach ($corpus as $item) {
+ $corpusTokens = self::tokenize($item['name'] ?? '');
+ $score = $this->cosineSimilarity($queryTokens, $corpusTokens);
+ if ($score > $bestScore) {
+ $bestScore = $score;
+ $bestItem = $item;
+ }
+ }
+
+ if ($bestItem === null || $bestScore <= 0.0) {
+ return null;
+ }
+
+ return new ParseResult(
+ category: $bestItem['category'] ?? null,
+ subcategory: $bestItem['subcategory'] ?? null,
+ species: $bestItem['species'] ?? null,
+ sort: $bestItem['sort'] ?? null,
+ type: $bestItem['type'] ?? null,
+ size: isset($bestItem['size']) ? (int) $bestItem['size'] : null,
+ color: $bestItem['color'] ?? null,
+ confidence: round($bestScore, 4),
+ method: 'similarity',
+ );
+ }
+
+ /**
+ * Токенизация: lowercase, только буквы (2+ символов), без стоп-слов.
+ *
+ * @return string[]
+ */
+ public static function tokenize(string $text): array
+ {
+ $text = mb_strtolower($text);
+ $text = preg_replace('/\d+\s*(?:см|cm|СМ|CM)/iu', '', $text);
+ preg_match_all('/[а-яёa-z]{2,}/u', $text, $matches);
+ $tokens = $matches[0] ?? [];
+ return array_values(array_filter($tokens, fn($t) => !in_array($t, self::STOP_WORDS, true)));
+ }
+
+ private function cosineSimilarity(array $a, array $b): float
+ {
+ $vecA = array_count_values($a);
+ $vecB = array_count_values($b);
+
+ $allKeys = array_unique(array_merge(array_keys($vecA), array_keys($vecB)));
+ $dotProduct = 0.0;
+ $normA = 0.0;
+ $normB = 0.0;
+
+ foreach ($allKeys as $key) {
+ $va = $vecA[$key] ?? 0;
+ $vb = $vecB[$key] ?? 0;
+ $dotProduct += $va * $vb;
+ $normA += $va * $va;
+ $normB += $vb * $vb;
+ }
+
+ if ($normA === 0.0 || $normB === 0.0) {
+ return 0.0;
+ }
+
+ return $dotProduct / (sqrt($normA) * sqrt($normB));
+ }
+}
--- /dev/null
+<?php
+
+declare(strict_types=1);
+
+namespace tests\unit\services\automark;
+
+use Codeception\Test\Unit;
+use yii_app\services\automark\SimilarityMatcher;
+use yii_app\services\automark\ParseResult;
+
+/**
+ * @covers \yii_app\services\automark\SimilarityMatcher
+ */
+class SimilarityMatcherTest extends Unit
+{
+ private SimilarityMatcher $matcher;
+
+ protected function setUp(): void
+ {
+ $this->matcher = new SimilarityMatcher();
+ }
+
+ private function makeCorpus(): array
+ {
+ return [
+ ['name' => 'Роза красная 50см Premium', 'category' => 'Срезы', 'subcategory' => null, 'species' => 'Роза', 'sort' => 'Premium', 'type' => null, 'size' => 50, 'color' => 'Красная'],
+ ['name' => 'Роза белая 60см Экстра', 'category' => 'Срезы', 'subcategory' => null, 'species' => 'Роза', 'sort' => 'Экстра', 'type' => null, 'size' => 60, 'color' => 'Белая'],
+ ['name' => 'Хризантема белая 70 см', 'category' => 'Срезы', 'subcategory' => null, 'species' => 'Хризантема', 'sort' => null, 'type' => null, 'size' => 70, 'color' => 'Белая'],
+ ];
+ }
+
+ public function testFindsBestMatchForSimilarName(): void
+ {
+ $result = $this->matcher->findBestMatch('Роза красная 50 Premium', $this->makeCorpus());
+
+ $this->assertInstanceOf(ParseResult::class, $result);
+ $this->assertSame('Срезы', $result->category);
+ $this->assertSame('Роза', $result->species);
+ $this->assertSame('similarity', $result->method);
+ $this->assertGreaterThan(0.0, $result->confidence);
+ }
+
+ public function testReturnsNullForEmptyCorpus(): void
+ {
+ $result = $this->matcher->findBestMatch('Роза красная', []);
+
+ $this->assertNull($result);
+ }
+
+ public function testConfidenceIsHigherForCloseMatch(): void
+ {
+ $corpus = $this->makeCorpus();
+ $closeMatch = $this->matcher->findBestMatch('Роза красная 50 Premium', $corpus);
+ $farMatch = $this->matcher->findBestMatch('Нечто совсем другое', $corpus);
+
+ $this->assertGreaterThan($farMatch?->confidence ?? 0.0, $closeMatch->confidence);
+ }
+
+ public function testTokenizePublicMethod(): void
+ {
+ $tokens = SimilarityMatcher::tokenize('Роза Premium 50см');
+
+ $this->assertContains('роза', $tokens);
+ $this->assertContains('premium', $tokens);
+ }
+}