improve nixos options search (#55)

Co-authored-by: Rok Garbas <rok@garbas.si>
This commit is contained in:
adisbladis 2020-06-04 00:33:54 +01:00 committed by GitHub
parent 8896ea9e1c
commit 0a6369ea86
Failed to generate hash of commit
5 changed files with 176 additions and 36 deletions

View file

@ -29,7 +29,7 @@ click_log.basic_config(logger)
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
INDEX_SCHEMA_VERSION = 1
INDEX_SCHEMA_VERSION = 2
ANALYSIS = {
"analyzer": {
"nixAttrName": {
@ -37,6 +37,16 @@ ANALYSIS = {
"tokenizer": "nix_attrname",
"filter": ["lowercase", "nix_stopwords"],
},
"nixOptionName": {
"type": "custom",
"tokenizer": "nix_option_name",
"filter": ["lowercase"],
},
"nixOptionNameGranular": {
"type": "custom",
"tokenizer": "nix_option_name_granular",
"filter": ["lowercase"],
},
},
"tokenizer": {
"nix_attrname": {
@ -60,6 +70,31 @@ ANALYSIS = {
]
),
},
"nix_option_name": {
"type": "pattern",
"pattern": "[.]",
},
# Lower priority (virtualHost -> [virtual, host])
"nix_option_name_granular": {
"type": "pattern",
# Split on attrname separators like _, .
"pattern": "|".join(
[
"[_.-]", # Common separators like underscores, dots and dashes
# Camelcase tokenizer adapted from
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
"".join(
[
"(?<=[\\p{L}&&[^\\p{Lu}]])" # lower case
"(?=\\p{Lu})", # followed by upper case
"|",
"(?<=\\p{Lu})" # or upper case
"(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])", # followed by lower case
]
),
]
),
},
},
"filter": {
"nix_stopwords": {
@ -99,7 +134,20 @@ MAPPING = {
"package_position": {"type": "text"},
"package_homepage": {"type": "keyword"},
# Options fields
"option_name": {"type": "keyword"},
"option_name": {
"type": "text",
"analyzer": "nixOptionName",
"fielddata": True,
"fields": {
"raw": {
"type": "keyword"
},
"granular": {
"type": "text",
"analyzer": "nixOptionNameGranular",
},
},
},
"option_description": {"type": "text"},
"option_type": {"type": "keyword"},
"option_default": {"type": "text"},
@ -290,12 +338,13 @@ def ensure_index(es, index, mapping):
def create_index_name(channel, evaluation):
return (
f"latest-{channel}",
f"latest-{INDEX_SCHEMA_VERSION}-{channel}",
f"evaluation-{INDEX_SCHEMA_VERSION}-{channel}-{evaluation['revisions_since_start']}-{evaluation['git_revision']}",
)
def update_alias(es, name, index):
if es.indices.exists_alias(name=name):
indexes = set(es.indices.get_alias(name=name).keys())
# indexes to remove from alias
@ -310,6 +359,8 @@ def update_alias(es, name, index):
if actions:
es.indices.update_aliases({"actions": actions})
else:
es.indices.put_alias(index=index, name=name)
indexes = ", ".join(es.indices.get_alias(name=name).keys())
logger.debug(f"'{name}' alias now points to '{indexes}' index")

View file

@ -4,7 +4,6 @@ module Main exposing (main)
import Browser
import Browser.Navigation
import Search
import Html
exposing
( Html
@ -29,6 +28,7 @@ import Page.Options
import Page.Packages
import RemoteData
import Route
import Search
import Url
@ -37,7 +37,8 @@ import Url
type alias Flags =
{ elasticsearchUrl : String
{ elasticsearchMappingSchemaVersion : Int
, elasticsearchUrl : String
, elasticsearchUsername : String
, elasticsearchPassword : String
}
@ -70,6 +71,7 @@ init flags url navKey =
, url = url
, elasticsearch =
Search.Options
flags.elasticsearchMappingSchemaVersion
flags.elasticsearchUrl
flags.elasticsearchUsername
flags.elasticsearchPassword

View file

@ -221,20 +221,85 @@ makeRequestBody query from size =
-- example query for "python"
-- {
-- "from": 0,
-- "size": 10,
-- "size": 1000,
-- "query": {
-- "bool": {
-- "filter": {
-- "match": {
-- "type": "package"
-- },
-- },
-- "must": {
-- "bool": {
-- "should": [
-- {
-- "term": {
-- "option_name.raw": {
-- "value": "nginx",
-- "boost": 2.0
-- }
-- }
-- },
-- {
-- "term": {
-- "option_name": {
-- "value": "nginx",
-- "boost": 1.0
-- }
-- }
-- },
-- {
-- "term": {
-- "option_name.granular": {
-- "value": "nginx",
-- "boost": 0.6
-- }
-- }
-- },
-- {
-- "term": {
-- "option_description": {
-- "value": "nginx",
-- "boost": 0.3
-- }
-- }
-- }
-- ]
-- }
-- },
-- "filter": [
-- {
-- "match": {
-- "type": "option"
-- }
-- }
-- ]
-- }
-- },
-- "rescore" : {
-- "window_size": 500,
-- "query" : {
-- "score_mode": "total",
-- "rescore_query" : {
-- "function_score" : {
-- "script_score": {
-- "script": {
-- "source": "
-- int i = 1;
-- for (token in doc['option_name.raw'][0].splitOnToken('.')) {
-- if (token == \"nginx\") {
-- return 10000 - (i * 100);
-- }
-- i++;
-- }
-- return 10;
-- "
-- }
-- }
-- }
-- }
-- }
-- }
-- }
let
stringIn name value =
[ ( name, Json.Encode.string value ) ]
listIn name type_ value =
[ ( name, Json.Encode.list type_ value ) ]
@ -254,24 +319,44 @@ makeRequestBody query from size =
)
]
in
[ ( "option_name", 2.0 )
[ ( "option_name.raw", 2.0 )
, ( "option_name", 1.0 )
, ( "option_name.granular", 0.6 )
, ( "option_description", 0.3 )
]
|> List.map encodeTerm
|> listIn "should" Json.Encode.object
|> objectIn "bool"
|> objectIn "must"
|> ([ ( "type", Json.Encode.string "option" ) ]
|> objectIn "match"
|> objectIn "filter"
|> List.append
[ ( "filter"
, Json.Encode.object
[ ( "match"
, Json.Encode.object
[ ( "type", Json.Encode.string "option" )
]
)
]
)
]
|> objectIn "bool"
|> objectIn "query"
|> List.append
("""int i = 1;
for (token in doc['option_name.raw'][0].splitOnToken('.')) {
if (token == '"""
++ query
++ """') {
return 10000 - (i * 100);
}
i++;
}
return 10;
"""
|> stringIn "source"
|> objectIn "script"
|> objectIn "script_score"
|> objectIn "function_score"
|> objectIn "rescore_query"
|> List.append ("total" |> stringIn "score_mode")
|> objectIn "query"
|> List.append [ ( "window_size", Json.Encode.int 1000 ) ]
|> objectIn "rescore"
)
|> List.append
[ ( "from", Json.Encode.int from )
, ( "size", Json.Encode.int size )
@ -290,7 +375,7 @@ makeRequest :
makeRequest options channel query from size =
Search.makeRequest
(makeRequestBody query from size)
("latest-nixos-" ++ channel)
("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-nixos-" ++ channel)
decodeResultItemSource
options
query

View file

@ -429,7 +429,8 @@ viewPager outMsg model result path =
type alias Options =
{ url : String
{ mappingSchemaVersion : Int
, url : String
, username : String
, password : String
}

View file

@ -5,6 +5,7 @@ require("./index.scss");
const {Elm} = require('./Main');
Elm.Main.init({flags: {
elasticsearchMappingSchemaVersion: process.env.ELASTICSEARCH_MAPPING_SCHEMA_VERSION || 2,
elasticsearchUrl: process.env.ELASTICSEARCH_URL || 'https://nixos-search-5886075189.us-east-1.bonsaisearch.net:443',
elasticsearchUsername : process.env.ELASTICSEARCH_USERNAME || 'z3ZFJ6y2mR',
elasticsearchPassword : process.env.ELASTICSEARCH_PASSWORD || 'ds8CEvALPf9pui7XG'