Skip to content

Elasticsearch Integration

DealAI.lt uses Elasticsearch 7.x as its full-text search engine, providing fast, relevant search results across 60,000+ products with Lithuanian language support.

Server Details:

  • Host: 91.99.113.45
  • Port: 9200
  • Index: products
  • Version: Elasticsearch 7.x

Purpose: Proper handling of Lithuanian language text

Configuration:

{
"settings": {
"analysis": {
"analyzer": {
"lithuanian": {
"type": "snowball",
"language": "Lithuanian"
}
},
"filter": {
"lithuanian_stop": {
"type": "stop",
"stopwords": "_lithuanian_"
},
"lithuanian_stemmer": {
"type": "stemmer",
"language": "lithuanian"
}
}
},
"number_of_shards": 3,
"number_of_replicas": 1
}
}

Complete Mapping:

{
"mappings": {
"properties": {
"id": {
"type": "integer"
},
"title": {
"type": "text",
"analyzer": "lithuanian",
"fields": {
"keyword": {
"type": "keyword"
},
"raw": {
"type": "keyword"
}
}
},
"brand": {
"type": "keyword",
"fields": {
"text": {
"type": "text",
"analyzer": "lithuanian"
}
}
},
"description": {
"type": "text",
"analyzer": "lithuanian"
},
"price": {
"type": "float"
},
"list_price": {
"type": "float"
},
"discount_percentage": {
"type": "float"
},
"availability": {
"type": "keyword"
},
"category": {
"type": "keyword"
},
"category_path": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"sku": {
"type": "keyword"
},
"ean": {
"type": "keyword"
},
"image_url": {
"type": "keyword",
"index": false
},
"product_url": {
"type": "keyword",
"index": false
},
"specifications": {
"type": "object",
"enabled": false
},
"created_at": {
"type": "date"
},
"updated_at": {
"type": "date"
}
}
}
}

File: /inc/elasticsearch.php

require 'vendor/autoload.php';
use Elasticsearch\ClientBuilder;
function get_elasticsearch_client() {
static $client = null;
if ($client === null) {
$client = ClientBuilder::create()
->setHosts([ELASTICSEARCH_HOST . ':' . ELASTICSEARCH_PORT])
->build();
}
return $client;
}

Basic Search:

function elasticsearch_search($query, $filters = [], $page = 1, $per_page = 20) {
$client = get_elasticsearch_client();
$from = ($page - 1) * $per_page;
$params = [
'index' => 'products',
'body' => [
'from' => $from,
'size' => $per_page,
'query' => build_search_query($query, $filters),
'highlight' => [
'fields' => [
'title' => new \stdClass(),
'description' => new \stdClass(),
'brand' => new \stdClass()
]
],
'sort' => build_sort_params($filters)
]
];
return $client->search($params);
}

Query Builder:

function build_search_query($query, $filters) {
$must = [];
$filter = [];
// Full-text search
if (!empty($query)) {
$must[] = [
'multi_match' => [
'query' => $query,
'fields' => ['title^3', 'brand^2', 'description', 'sku'],
'fuzziness' => 'AUTO',
'operator' => 'or'
]
];
}
// Category filter
if (!empty($filters['category'])) {
$filter[] = [
'term' => [
'category' => $filters['category']
]
];
}
// Price range
if (!empty($filters['min_price']) || !empty($filters['max_price'])) {
$range = [];
if (!empty($filters['min_price'])) {
$range['gte'] = $filters['min_price'];
}
if (!empty($filters['max_price'])) {
$range['lte'] = $filters['max_price'];
}
$filter[] = [
'range' => [
'price' => $range
]
];
}
// Brand filter
if (!empty($filters['brand'])) {
$filter[] = [
'terms' => [
'brand' => (array)$filters['brand']
]
];
}
// Availability filter
if (!empty($filters['availability'])) {
$filter[] = [
'term' => [
'availability' => $filters['availability']
]
];
}
return [
'bool' => [
'must' => $must,
'filter' => $filter
]
];
}

Faceted Search:

function elasticsearch_aggregations() {
return [
'categories' => [
'terms' => [
'field' => 'category',
'size' => 50
]
],
'brands' => [
'terms' => [
'field' => 'brand',
'size' => 100
]
],
'price_ranges' => [
'range' => [
'field' => 'price',
'ranges' => [
['to' => 50],
['from' => 50, 'to' => 100],
['from' => 100, 'to' => 250],
['from' => 250, 'to' => 500],
['from' => 500]
]
]
],
'availability' => [
'terms' => [
'field' => 'availability'
]
],
'price_stats' => [
'stats' => [
'field' => 'price'
]
]
];
}

Phase 1: Initialize

  • Clear sync status flags
  • Prepare index
  • Set up state management

Phase 2: Bulk Sync

  • Process products in batches (500/batch)
  • Transform data for Elasticsearch
  • Index documents via Bulk API
  • Track progress and errors

Phase 3: Incremental

  • Monitor for changes
  • Sync only updated products
  • Maintain real-time freshness

File: /scripts/elasticsearch-auto-sync.php

function bulk_index_products($products) {
$client = get_elasticsearch_client();
$params = ['body' => []];
foreach ($products as $product) {
// Index action
$params['body'][] = [
'index' => [
'_index' => 'products',
'_id' => $product['id']
]
];
// Document data
$params['body'][] = transform_product_for_elasticsearch($product);
}
try {
$response = $client->bulk($params);
return process_bulk_response($response);
} catch (Exception $e) {
error_log("Bulk indexing error: " . $e->getMessage());
return false;
}
}
function transform_product_for_elasticsearch($product) {
return [
'id' => (int)$product['id'],
'title' => $product['title'],
'brand' => $product['brand'],
'description' => $product['description'],
'price' => (float)$product['price'],
'list_price' => (float)$product['list_price'],
'discount_percentage' => calculate_discount($product),
'availability' => $product['availability'],
'category' => $product['category_name'],
'category_path' => $product['category_path'],
'sku' => $product['sku'],
'ean' => $product['ean'],
'image_url' => get_first_image($product['image_urls']),
'product_url' => $product['product_url'],
'specifications' => json_decode($product['specifications'], true),
'created_at' => $product['created_at'],
'updated_at' => $product['updated_at']
];
}

Automatically handles typos:

'fuzziness' => 'AUTO' // Allows 1-2 character differences

Shows matched terms:

'highlight' => [
'pre_tags' => ['<mark>'],
'post_tags' => ['</mark>'],
'fields' => [
'title' => ['fragment_size' => 150],
'description' => ['fragment_size' => 200]
]
]

Auto-complete functionality:

function elasticsearch_suggestions($prefix) {
$client = get_elasticsearch_client();
$params = [
'index' => 'products',
'body' => [
'suggest' => [
'product-suggest' => [
'prefix' => $prefix,
'completion' => [
'field' => 'title.suggest',
'size' => 10,
'fuzzy' => [
'fuzziness' => 1
]
]
]
]
]
];
return $client->search($params);
}
function cached_elasticsearch_search($query_hash, $query_params) {
$cache_key = 'es_search_' . $query_hash;
$cached = wp_cache_get($cache_key);
if ($cached !== false) {
return $cached;
}
$results = elasticsearch_search($query_params);
wp_cache_set($cache_key, $results, '', 300); // 5 min cache
return $results;
}
  • Use filters instead of queries when possible
  • Limit result size appropriately
  • Use pagination for large result sets
  • Minimize aggregation complexity
Terminal window
curl -X GET "http://91.99.113.45:9200/_cluster/health?pretty"
Terminal window
curl -X GET "http://91.99.113.45:9200/products/_stats?pretty"
function elasticsearch_log_slow_queries($query, $execution_time) {
if ($execution_time > 500) { // 500ms threshold
error_log("Slow Elasticsearch query ({$execution_time}ms): " . json_encode($query));
}
}