Improve search result based on field boost in elasticsearch - vb.net

I am using ElasticSearch 1.7 first time and I have setup weight based on fields. It might change as per requirement. I am getting result from my query but issue is that if I change field weight dramatically then I can't see that much effect on records. Please check my below query and let me know if I am doing anything wrong.
ElasticSearch Query :
{
"from": 0,
"size": 10,
"highlight": {
"pre_tags": [
"<b>"
],
"post_tags": [
"</b>"
],
"fields": {
"title": {},
"description": {}
}
},
"query": {
"function_score": {
"query": {
"query_string": {
"query": "any keyword",
"fields": [
"fullText",
"title^100",
"authors^4",
"pubYear^4",
"publisher^4",
"abstract^2",
"documentTypeName^2",
"topic^6",
"topicSynonym^6"
"quality_value^6",
"domain^2"
],
"default_operator": "AND",
"analyze_wildcard": true
}
},
"score_mode": "sum",
"boost_mode": "sum",
"max_boost": 100
}
}
}
Sample Data:
{
"took": 44,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1465,
"max_score": 14.961364,
"hits": [
{
"_index": "snData",
"_type": "report",
"_id": "159",
"_score": 14.961364,
"_source": {
"str_ID": "159",
"topic": [
"strategy",
"Consumer-targeted strategy"
],
"topicSynonym": [
"assistance",
"coping",
"coping strategies",
"encouragement",
"support"
],
"fullText": "Background: As the incidence and prevalence of prostate cancer continue to rise, the number of men needing help and support to assist them in coping with disease and treatment-related symptoms and their psychosocial effects is likely to increase.",
"quality_value": 1,
"ID": 24034,
"title": "Psychosocial interventions for men with prostate cancer",
"authors": "Parahoo K E Noyes",
"pubYear": "2013",
"publisher": "",
"abstractEN": "Background: As the incidence and prevalence of prostate cancer continue to rise, the number of men needing help and support to assist them in coping with disease and treatment-related symptoms and their psychosocial effects is likely to increase.",
"uniqueID": "",
"documentTypeName": "Review of effects",
"viewCount": 28,
},
"highlight": {
"title": [
"Interventions for men with prostate <b>cancer</b>"
],
"abstract": [
"Background: As the incidence and prevalence of prostate <b>cancer</b> continue to rise, the number of men"
]
}
}
]
}
}
Use Case : If I change weight of quality_value or of anyone else then it should change result based on field weight. I am not sure whether my query is correct or I am missing anything. I am using ElasticSearch 1.7.

Related

Search including special characters in MongoDB Atlas

I faced with the issue when I try to search for several words including a special character (section sign "§").
Example: AB § 32.
I want all words "AB", "32" and symbol "§" to be included in found documents.
In some cases document can be found, in some not.
If my document contains the following text then search finds it:
Lagrum: 32 § 1 mom. första stycket a) kommunalskattelagen (1928:370) AB
But if document contains this text then search doesn't find:
Lagrum: 32 § 1 mom. första stycket AB
For symbol "§" I use UT8-encoding "\xc2\xa7".
Index uses "lucene.swedish" analyzer.
"Content": [
{
"analyzer": "lucene.swedish",
"minGrams": 4,
"tokenization": "nGram",
"type": "autocomplete"
},
{
"analyzer": "lucene.swedish",
"type": "string"
}
]
Query looks like:
{
"index": "test_index",
"compound": {
"filter": [
{
"text": {
"query": [
"111111111111"
],
"path": "ProductId"
}
},
],
"must": [
{
"autocomplete": {
"query": [
"AB"
],
"path": "Content"
}
},
{
"autocomplete": {
"query": [
"\xc2\xa7",
],
"path": "Content"
}
},
{
"autocomplete": {
"query": [
"32"
],
"path": "Content"
}
}
],
},
"count": {
"type": "lowerBound",
"threshold": 500
}
}
The question is what is wrong with the search and how can I get a correct result (return both above mentioned documents) ?
Focusing only on the content field, here is an index definition that should work for your requirements. The docs are here. Let me know if this works for you.
{
"mappings": {
"dynamic": false,
"fields": {
"content": [
{
"type": "autocomplete",
"tokenization": "nGram",
"minGrams": 4,
"maxGrams": 7,
"foldDiacritics": false,
"analyzer": "lucene.whitespace"
},
{
"analyzer": "lucene.swedish",
"type": "string"
}
]
}
}
}

azure search exact match of file name not returning exact results

I am indexing all the file names into the index. But when I search with exact file name in the search query it is returning all other file names also. below is my index definition.
{
"fields": [
{
"name": "id",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": true,
"retrievable": true,
"searchable": false,
"sortable": false,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "FileName",
"type": "Edm.String",
"facetable": false,
"filterable": false,
"key": false,
"retrievable": true,
"searchable": true,
"sortable": false,
"analyzer": "keyword-analyzer",
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
}
],
"scoringProfiles": [],
"defaultScoringProfile": null,
"corsOptions": null,
"analyzers": [
{
"name": "keyword-analyzer",
"#odata.type": "#Microsoft.Azure.Search.CustomAnalyzer",
"charFilters": [],
"tokenizer": "keyword_v2",
"tokenFilters": ["lowercase", "my_asciifolding", "my_word_delimiter"]
}
],
"tokenFilters": [
{
"#odata.type": "#Microsoft.Azure.Search.AsciiFoldingTokenFilter",
"name": "my_asciifolding",
"preserveOriginal": true
},
{
"#odata.type": "#Microsoft.Azure.Search.WordDelimiterTokenFilter",
"name": "my_word_delimiter",
"generateWordParts": true,
"generateNumberParts": false,
"catenateWords": false,
"catenateNumbers": false,
"catenateAll": false,
"splitOnCaseChange": true,
"preserveOriginal": true,
"splitOnNumerics": true,
"stemEnglishPossessive": false,
"protectedWords": []
}
],
"#odata.etag": "\"0x8D6FB2F498F9AD2\""
}
Below is my sample data
{
"value": [
{
"id": "1",
"FileName": "SamplePSDFile_1psd2680.psd"
},
{
"id": "2",
"FileName": "SamplePSDFile-1psd260.psd"
},
{
"id": "3",
"FileName": "SamplePSDFile_1psd2689.psd"
},
{
"id": "4",
"FileName": "SamplePSDFile-1psdxx2680.psd"
}
]
}
Below is the Analyze API results
{
"tokens": [
{
"token": "samplepsdfile_1psd2689.psd",
"startOffset": 0,
"endOffset": 26,
"position": 0
},
{
"token": "samplepsdfile",
"startOffset": 0,
"endOffset": 13,
"position": 0
},
{
"token": "psd",
"startOffset": 15,
"endOffset": 18,
"position": 1
},
{
"token": "psd",
"startOffset": 23,
"endOffset": 26,
"position": 2
}
]
}
When I search with the keyword "SamplePSDFile_1psd2689.psd", Azure search returning three records in the results instead of only document 3. Below is my search query and the results.
?search="SamplePSDFile_1psd2689.psd"&api-version=2019-05-06&$count=true&queryType=full&searchMode=All
{
"#odata.count": 3,
"value": [
{
"#search.score": 2.3387241,
"id": "2",
"FileName": "SamplePSDFile-1psd260.psd"
},
{
"#search.score": 2.2493405,
"id": "3",
"FileName": "SamplePSDFile_1psd2689.psd"
},
{
"#search.score": 2.2493405,
"id": "1",
"FileName": "SamplePSDFile_1psd2680.psd"
}
]
}
How I can achieve my expected results. I tried with and without double quotes around the keyword all other options, but no luck. What I am doing wrong here in this case?
Some body suggested to use $filter, but that field wasn't filterable in our case.
Please help me on this.
If you are looking for exact match then you probably don't want any analyzer involved. Give it a try with this line
"analyzer": "keyword-analyzer"
changed to
"analyzer": null
If you need to be able to do exact match on the field and also support partial keyword searches then you need to index the field twice with different names. Maybe append “Exact” to the exact match field name and don’t use an analyzer for that one. The name without exact can have an analyzer. Then search on the field using the right field name index depending on the type of search.

Duplicate elements in AWS cloudwatch Embedded metrics

I am trying to log my service request.
First I try to get the service from my partner, upon failure I
try the same from my vendor, hence I need to add the same metrics under two different dimensions.
Following is my log structure, apparently, this is wrong as JSON does not support duplicate elements,
and AWS picks only the latest value in case of duplicates elements.
Kindly suggest the right way of doing this.
{
"_aws": {
"Timestamp": 1574109732004,
"CloudWatchMetrics": [{
"Namespace": "NameSpace1",
"Dimensions": [["Partner"]],
"Metrics": [{
"Name": "requestCount",
"Unit": "Count"
}, {
"Name": "requestFailure",
"Unit": "Count"
}, {
"Name": "responseTime",
"Unit": "Milliseconds"
}]
},
{
"Namespace": "NameSpace1",
"Dimensions": [["vendor"]],
"Metrics": [{
"Name": "requestCount",
"Unit": "Count"
}, {
"Name": "requestSuccess",
"Unit": "Count"
}, {
"Name": "responseTime",
"Unit": "Milliseconds"
}]
}]
},
"Partner": "partnerName",
"requestCount": 1,
"requestFailure": 1,
"responseTime": 1,
"vendor": "vendorName",
"requestCount": 2,
"requestSuccess": 2,
"responseTime": 2,
}
This will give you metrics separated by partner and vendor:
{
"Partner": "partnerName",
"vendor": "vendorName",
"_aws": {
"Timestamp": 1577179437354,
"CloudWatchMetrics": [
{
"Dimensions": [
[
"Partner"
],
[
"vendor"
]
],
"Metrics": [
{
"Name": "requestCount",
"Unit": "Count"
},
{
"Name": "requestFailure",
"Unit": "Count"
},
{
"Name": "requestSuccess",
"Unit": "Count"
},
{
"Name": "responseTime",
"Unit": "Milliseconds"
}
],
"Namespace": "NameSpace1"
}
]
},
"requestCount": 1,
"requestFailure": 1,
"requestSuccess": 1,
"responseTime": 2
}
Note that this will duplicate the metrics between the two dimensions (if partner registers failure it will be registered on the vendor failure metric also). If you need to avoid this, you can either:
have metric names specific to each type (like partnerRequestFailure and vendorRequestFailure)
or you need to publish separate json, one for partner and one for vendor.

eBay API issues - cannot publish an offer

All of the following is being performed in eBay's API sandbox.
I am attempting to list an item by using the inventory API. Specifically, I have created an inventory item and a relevant offer for that item. When I make a POST request to the publish offer endpoint, I get the following error:
{
"errors": [
{
"errorId": 25016,
"domain": "API_INVENTORY",
"subdomain": "Selling",
"category": "REQUEST",
"message": "The title value is invalid. Seller Provided Title Value is missing."
},
{
"errorId": 25002,
"domain": "API_INVENTORY",
"subdomain": "Selling",
"category": "REQUEST",
"message": "A user error has occurred. The duration \"GTC\" day(s) is not available for this listing type, or invalid for category \"49996\".",
"parameters": [
{
"name": "0",
"value": "GTC"
},
{
"name": "1",
"value": "49996"
}
]
}
]
}
I can't see any reference in any of the API documentation to a "Seller Provided Title". The duration error is also confusing as the API says it only supports "GTC" listings. The product has a title so it must be in reference to something else.
My inventory item is as follows:
{
"sku": "13725",
"product": {
"title": "Harley Davidson bike",
"aspects": {
"Year": [
"2016"
],
"Model": [
"Road Glide Special"
],
"Manufacurer": [
"Harley-Davidson®"
],
"Type": [
"Touring"
],
"For Sale By": [
"Dealer"
],
"Vehicle Title": [
"Clear"
],
"Mileage": [
"13393"
],
"VIN (Vehicle Identification Number)": [
"1HD1KTM10GB627264"
],
"Color": [
"Black Quartz"
]
},
"description": "Item description goes here",
"imageUrls": [
"https://dw4i9za0jmiyk.cloudfront.net/2018/01/12/pre_ic60e5df584b870c3d2a55c86800eede_70618b24eb08.jpg"
]
},
"condition": "USED_EXCELLENT",
"availability": {
"pickupAtLocationAvailability": [
{
"quantity": 1,
"merchantLocationKey": "425",
"availabilityType": "IN_STOCK",
"fulfillmentTime": {
"value": 1,
"unit": "DAY"
}
}
]
}
}
And my offer object is as follows:
{
"offerId": "5852159010",
"sku": "13725",
"marketplaceId": "EBAY_MOTORS",
"format": "FIXED_PRICE",
"availableQuantity": 0,
"pricingSummary": {
"price": {
"value": "18294.0",
"currency": "USD"
}
},
"listingPolicies": {
"paymentPolicyId": "5807565000",
"fulfillmentPolicyId": "5806186000"
},
"categoryId": "49996",
"merchantLocationKey": "425",
"tax": {
"applyTax": false
},
"status": "UNPUBLISHED",
"eBayPlusEligible": false
}
I had similar issues on sandbox, and came to the conculsion it was broken.
They also have some limits on only certain categories working.
Have you tried it agains the live API, I have found this to be far more reliable, ignoring the fact doing development work live is dangerous!
For your info here is my working code offer:
inventory_template = {
"availability": {
"shipToLocationAvailability": {
"quantity": product.quantity_available
}
},
"condition": "NEW",
"product": {
"aspects": {spec.name: [spec.value] for spec in product.specifics},
"brand": product.product_brand,
"description": product.product_description,
"imageUrls": [
"https://ebay.mydomain.co.uk/{}".format(img.image_link) for img in product.images],
"mpn": product.product_mpn,
"title": product.product_title,
"upc": [
product.product_upc,
],
"ean": [
product.product_ean,
],
# "epid": "string"
},
"sku": sku,
}
offer_body = {
"availableQuantity": offer.available_quantity,
"categoryId": offer.category_id,
"listingDescription": html,
"listingPolicies": {
"paymentPolicyId": offer.payment_policy_id,
"returnPolicyId": offer.return_policy_id,
"fulfillmentPolicyId": offer.fulfillment_policy_id,
},
"merchantLocationKey": offer.merchant_location_key,
"pricingSummary": {
"price": {
"value": offer.summary_price_value,
"currency": offer.summary_price_currency
}
},
"sku": offer.sku,
"marketplaceId": offer.marketplace_id,
"format": offer.format
}
the offer.available_quantity etc are items from my database, its the structure I'm showing.

ElasticSearch - return the complete value of a facet for a query

I've recently started using ElasticSearch. I try to complete some use cases. I have a problem for one of them.
I have indexed some users with their full name (e.g. "Jean-Paul Gautier", "Jean De La Fontaine").
I try to get all the full names responding to some query.
For example, I want the 100 most frequent full names beggining by "J"
{
"query": {
"query_string" : { "query": "full_name:J*" } }
},
"facets":{
"name":{
"terms":{
"field": "full_name",
"size":100
}
}
}
}
The result I get is all the words of the full names : "Jean", "Paul", "Gautier", "De", "La", "Fontaine".
How to get "Jean-Paul Gautier" and "Jean De La Fontaine" (all the full_name values begging by 'J') ? The "post_filter" option is not doing this, it only restrict this above subset.
I have to configure "how works" this full_name facet
I have to add some options to this current query
I have to do some "mapping" (very obscure for the moment)
Thanks
You just need to set "index": "not_analyzed" on the field, and you will be able to get back the full, unmodified field values in your facet.
Typically, it's nice to have one version of the field that isn't analyzed (for faceting) and another that is (for searching). The "multi_field" field type is useful for this.
So in this case, I can define a mapping as follows:
curl -XPUT "http://localhost:9200/test_index/" -d'
{
"mappings": {
"people": {
"properties": {
"full_name": {
"type": "multi_field",
"fields": {
"untouched": {
"type": "string",
"index": "not_analyzed"
},
"full_name": {
"type": "string"
}
}
}
}
}
}
}'
Here we have two sub-fields. The one with the same name as the parent will be the default, so if you search against the "full_name" field, Elasticsearch will actually use "full_name.full_name". "full_name.untouched" will give you the facet results you want.
So next I add two documents:
curl -XPUT "http://localhost:9200/test_index/people/1" -d'
{
"full_name": "Jean-Paul Gautier"
}'
curl -XPUT "http://localhost:9200/test_index/people/2" -d'
{
"full_name": "Jean De La Fontaine"
}'
And then I can facet on each field to see what is returned:
curl -XPOST "http://localhost:9200/test_index/_search" -d'
{
"size": 0,
"facets": {
"name_terms": {
"terms": {
"field": "full_name"
}
},
"name_untouched": {
"terms": {
"field": "full_name.untouched",
"size": 100
}
}
}
}'
and I get back the following:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"facets": {
"name_terms": {
"_type": "terms",
"missing": 0,
"total": 7,
"other": 0,
"terms": [
{
"term": "jean",
"count": 2
},
{
"term": "paul",
"count": 1
},
{
"term": "la",
"count": 1
},
{
"term": "gautier",
"count": 1
},
{
"term": "fontaine",
"count": 1
},
{
"term": "de",
"count": 1
}
]
},
"name_untouched": {
"_type": "terms",
"missing": 0,
"total": 2,
"other": 0,
"terms": [
{
"term": "Jean-Paul Gautier",
"count": 1
},
{
"term": "Jean De La Fontaine",
"count": 1
}
]
}
}
}
As you can see, the analyzed field returns single-word, lower-cased tokens (when you don't specify an analyzer, the standard analyzer is used), and the un-analyzed sub-field returns the unmodified original text.
Here is a runnable example you can play with:
http://sense.qbox.io/gist/7abc063e2611846011dd874648fd1b77450b19a5
Try altering the mapping for "full_name":
"properties": {
"full_name": {
"type": "string",
"index": "not_analyzed"
}
...
}
not_analyzed means that it will be kept as is, capitals, spaces, dashes etc, so that "Jean De La Fontaine" will stay findable and not be tokenized into "Jean" "De" "La" "Fontaine"
You can experiment with different analyzers using the api
Notice what the standard one does to a mulit part name:
GET /_analyze?analyzer=standard
{'Jean Claude Van Dame'}
{
"tokens": [
{
"token": "jean",
"start_offset": 2,
"end_offset": 6,
"type": "<ALPHANUM>",
"position": 1
},
{
"token": "claude",
"start_offset": 7,
"end_offset": 13,
"type": "<ALPHANUM>",
"position": 2
},
{
"token": "van",
"start_offset": 14,
"end_offset": 17,
"type": "<ALPHANUM>",
"position": 3
},
{
"token": "dame",
"start_offset": 18,
"end_offset": 22,
"type": "<ALPHANUM>",
"position": 4
}
]
}