improve nixos options search (#55)
Co-authored-by: Rok Garbas <rok@garbas.si>
This commit is contained in:
parent
8896ea9e1c
commit
0a6369ea86
|
@ -29,7 +29,7 @@ click_log.basic_config(logger)
|
||||||
|
|
||||||
|
|
||||||
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
INDEX_SCHEMA_VERSION = 1
|
INDEX_SCHEMA_VERSION = 2
|
||||||
ANALYSIS = {
|
ANALYSIS = {
|
||||||
"analyzer": {
|
"analyzer": {
|
||||||
"nixAttrName": {
|
"nixAttrName": {
|
||||||
|
@ -37,6 +37,16 @@ ANALYSIS = {
|
||||||
"tokenizer": "nix_attrname",
|
"tokenizer": "nix_attrname",
|
||||||
"filter": ["lowercase", "nix_stopwords"],
|
"filter": ["lowercase", "nix_stopwords"],
|
||||||
},
|
},
|
||||||
|
"nixOptionName": {
|
||||||
|
"type": "custom",
|
||||||
|
"tokenizer": "nix_option_name",
|
||||||
|
"filter": ["lowercase"],
|
||||||
|
},
|
||||||
|
"nixOptionNameGranular": {
|
||||||
|
"type": "custom",
|
||||||
|
"tokenizer": "nix_option_name_granular",
|
||||||
|
"filter": ["lowercase"],
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"tokenizer": {
|
"tokenizer": {
|
||||||
"nix_attrname": {
|
"nix_attrname": {
|
||||||
|
@ -60,6 +70,31 @@ ANALYSIS = {
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
"nix_option_name": {
|
||||||
|
"type": "pattern",
|
||||||
|
"pattern": "[.]",
|
||||||
|
},
|
||||||
|
# Lower priority (virtualHost -> [virtual, host])
|
||||||
|
"nix_option_name_granular": {
|
||||||
|
"type": "pattern",
|
||||||
|
# Split on attrname separators like _, .
|
||||||
|
"pattern": "|".join(
|
||||||
|
[
|
||||||
|
"[_.-]", # Common separators like underscores, dots and dashes
|
||||||
|
# Camelcase tokenizer adapted from
|
||||||
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
|
||||||
|
"".join(
|
||||||
|
[
|
||||||
|
"(?<=[\\p{L}&&[^\\p{Lu}]])" # lower case
|
||||||
|
"(?=\\p{Lu})", # followed by upper case
|
||||||
|
"|",
|
||||||
|
"(?<=\\p{Lu})" # or upper case
|
||||||
|
"(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])", # followed by lower case
|
||||||
|
]
|
||||||
|
),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"filter": {
|
"filter": {
|
||||||
"nix_stopwords": {
|
"nix_stopwords": {
|
||||||
|
@ -99,7 +134,20 @@ MAPPING = {
|
||||||
"package_position": {"type": "text"},
|
"package_position": {"type": "text"},
|
||||||
"package_homepage": {"type": "keyword"},
|
"package_homepage": {"type": "keyword"},
|
||||||
# Options fields
|
# Options fields
|
||||||
"option_name": {"type": "keyword"},
|
"option_name": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "nixOptionName",
|
||||||
|
"fielddata": True,
|
||||||
|
"fields": {
|
||||||
|
"raw": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"granular": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "nixOptionNameGranular",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
"option_description": {"type": "text"},
|
"option_description": {"type": "text"},
|
||||||
"option_type": {"type": "keyword"},
|
"option_type": {"type": "keyword"},
|
||||||
"option_default": {"type": "text"},
|
"option_default": {"type": "text"},
|
||||||
|
@ -290,12 +338,13 @@ def ensure_index(es, index, mapping):
|
||||||
|
|
||||||
def create_index_name(channel, evaluation):
|
def create_index_name(channel, evaluation):
|
||||||
return (
|
return (
|
||||||
f"latest-{channel}",
|
f"latest-{INDEX_SCHEMA_VERSION}-{channel}",
|
||||||
f"evaluation-{INDEX_SCHEMA_VERSION}-{channel}-{evaluation['revisions_since_start']}-{evaluation['git_revision']}",
|
f"evaluation-{INDEX_SCHEMA_VERSION}-{channel}-{evaluation['revisions_since_start']}-{evaluation['git_revision']}",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def update_alias(es, name, index):
|
def update_alias(es, name, index):
|
||||||
|
if es.indices.exists_alias(name=name):
|
||||||
indexes = set(es.indices.get_alias(name=name).keys())
|
indexes = set(es.indices.get_alias(name=name).keys())
|
||||||
|
|
||||||
# indexes to remove from alias
|
# indexes to remove from alias
|
||||||
|
@ -310,6 +359,8 @@ def update_alias(es, name, index):
|
||||||
|
|
||||||
if actions:
|
if actions:
|
||||||
es.indices.update_aliases({"actions": actions})
|
es.indices.update_aliases({"actions": actions})
|
||||||
|
else:
|
||||||
|
es.indices.put_alias(index=index, name=name)
|
||||||
|
|
||||||
indexes = ", ".join(es.indices.get_alias(name=name).keys())
|
indexes = ", ".join(es.indices.get_alias(name=name).keys())
|
||||||
logger.debug(f"'{name}' alias now points to '{indexes}' index")
|
logger.debug(f"'{name}' alias now points to '{indexes}' index")
|
||||||
|
|
|
@ -4,7 +4,6 @@ module Main exposing (main)
|
||||||
|
|
||||||
import Browser
|
import Browser
|
||||||
import Browser.Navigation
|
import Browser.Navigation
|
||||||
import Search
|
|
||||||
import Html
|
import Html
|
||||||
exposing
|
exposing
|
||||||
( Html
|
( Html
|
||||||
|
@ -29,6 +28,7 @@ import Page.Options
|
||||||
import Page.Packages
|
import Page.Packages
|
||||||
import RemoteData
|
import RemoteData
|
||||||
import Route
|
import Route
|
||||||
|
import Search
|
||||||
import Url
|
import Url
|
||||||
|
|
||||||
|
|
||||||
|
@ -37,7 +37,8 @@ import Url
|
||||||
|
|
||||||
|
|
||||||
type alias Flags =
|
type alias Flags =
|
||||||
{ elasticsearchUrl : String
|
{ elasticsearchMappingSchemaVersion : Int
|
||||||
|
, elasticsearchUrl : String
|
||||||
, elasticsearchUsername : String
|
, elasticsearchUsername : String
|
||||||
, elasticsearchPassword : String
|
, elasticsearchPassword : String
|
||||||
}
|
}
|
||||||
|
@ -70,6 +71,7 @@ init flags url navKey =
|
||||||
, url = url
|
, url = url
|
||||||
, elasticsearch =
|
, elasticsearch =
|
||||||
Search.Options
|
Search.Options
|
||||||
|
flags.elasticsearchMappingSchemaVersion
|
||||||
flags.elasticsearchUrl
|
flags.elasticsearchUrl
|
||||||
flags.elasticsearchUsername
|
flags.elasticsearchUsername
|
||||||
flags.elasticsearchPassword
|
flags.elasticsearchPassword
|
||||||
|
|
|
@ -221,20 +221,85 @@ makeRequestBody query from size =
|
||||||
-- example query for "python"
|
-- example query for "python"
|
||||||
-- {
|
-- {
|
||||||
-- "from": 0,
|
-- "from": 0,
|
||||||
-- "size": 10,
|
-- "size": 1000,
|
||||||
-- "query": {
|
-- "query": {
|
||||||
-- "bool": {
|
-- "bool": {
|
||||||
-- "filter": {
|
-- "must": {
|
||||||
-- "match": {
|
-- "bool": {
|
||||||
-- "type": "package"
|
|
||||||
-- },
|
|
||||||
-- },
|
|
||||||
-- "should": [
|
-- "should": [
|
||||||
|
-- {
|
||||||
|
-- "term": {
|
||||||
|
-- "option_name.raw": {
|
||||||
|
-- "value": "nginx",
|
||||||
|
-- "boost": 2.0
|
||||||
|
-- }
|
||||||
|
-- }
|
||||||
|
-- },
|
||||||
|
-- {
|
||||||
|
-- "term": {
|
||||||
|
-- "option_name": {
|
||||||
|
-- "value": "nginx",
|
||||||
|
-- "boost": 1.0
|
||||||
|
-- }
|
||||||
|
-- }
|
||||||
|
-- },
|
||||||
|
-- {
|
||||||
|
-- "term": {
|
||||||
|
-- "option_name.granular": {
|
||||||
|
-- "value": "nginx",
|
||||||
|
-- "boost": 0.6
|
||||||
|
-- }
|
||||||
|
-- }
|
||||||
|
-- },
|
||||||
|
-- {
|
||||||
|
-- "term": {
|
||||||
|
-- "option_description": {
|
||||||
|
-- "value": "nginx",
|
||||||
|
-- "boost": 0.3
|
||||||
|
-- }
|
||||||
|
-- }
|
||||||
|
-- }
|
||||||
-- ]
|
-- ]
|
||||||
-- }
|
-- }
|
||||||
|
-- },
|
||||||
|
-- "filter": [
|
||||||
|
-- {
|
||||||
|
-- "match": {
|
||||||
|
-- "type": "option"
|
||||||
|
-- }
|
||||||
|
-- }
|
||||||
|
-- ]
|
||||||
|
-- }
|
||||||
|
-- },
|
||||||
|
-- "rescore" : {
|
||||||
|
-- "window_size": 500,
|
||||||
|
-- "query" : {
|
||||||
|
-- "score_mode": "total",
|
||||||
|
-- "rescore_query" : {
|
||||||
|
-- "function_score" : {
|
||||||
|
-- "script_score": {
|
||||||
|
-- "script": {
|
||||||
|
-- "source": "
|
||||||
|
-- int i = 1;
|
||||||
|
-- for (token in doc['option_name.raw'][0].splitOnToken('.')) {
|
||||||
|
-- if (token == \"nginx\") {
|
||||||
|
-- return 10000 - (i * 100);
|
||||||
|
-- }
|
||||||
|
-- i++;
|
||||||
|
-- }
|
||||||
|
-- return 10;
|
||||||
|
-- "
|
||||||
|
-- }
|
||||||
|
-- }
|
||||||
|
-- }
|
||||||
|
-- }
|
||||||
|
-- }
|
||||||
-- }
|
-- }
|
||||||
-- }
|
-- }
|
||||||
let
|
let
|
||||||
|
stringIn name value =
|
||||||
|
[ ( name, Json.Encode.string value ) ]
|
||||||
|
|
||||||
listIn name type_ value =
|
listIn name type_ value =
|
||||||
[ ( name, Json.Encode.list type_ value ) ]
|
[ ( name, Json.Encode.list type_ value ) ]
|
||||||
|
|
||||||
|
@ -254,24 +319,44 @@ makeRequestBody query from size =
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
in
|
in
|
||||||
[ ( "option_name", 2.0 )
|
[ ( "option_name.raw", 2.0 )
|
||||||
|
, ( "option_name", 1.0 )
|
||||||
|
, ( "option_name.granular", 0.6 )
|
||||||
, ( "option_description", 0.3 )
|
, ( "option_description", 0.3 )
|
||||||
]
|
]
|
||||||
|> List.map encodeTerm
|
|> List.map encodeTerm
|
||||||
|> listIn "should" Json.Encode.object
|
|> listIn "should" Json.Encode.object
|
||||||
|
|> objectIn "bool"
|
||||||
|
|> objectIn "must"
|
||||||
|
|> ([ ( "type", Json.Encode.string "option" ) ]
|
||||||
|
|> objectIn "match"
|
||||||
|
|> objectIn "filter"
|
||||||
|> List.append
|
|> List.append
|
||||||
[ ( "filter"
|
|
||||||
, Json.Encode.object
|
|
||||||
[ ( "match"
|
|
||||||
, Json.Encode.object
|
|
||||||
[ ( "type", Json.Encode.string "option" )
|
|
||||||
]
|
|
||||||
)
|
)
|
||||||
]
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|> objectIn "bool"
|
|> objectIn "bool"
|
||||||
|> objectIn "query"
|
|> objectIn "query"
|
||||||
|
|> List.append
|
||||||
|
("""int i = 1;
|
||||||
|
for (token in doc['option_name.raw'][0].splitOnToken('.')) {
|
||||||
|
if (token == '"""
|
||||||
|
++ query
|
||||||
|
++ """') {
|
||||||
|
return 10000 - (i * 100);
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
return 10;
|
||||||
|
"""
|
||||||
|
|> stringIn "source"
|
||||||
|
|> objectIn "script"
|
||||||
|
|> objectIn "script_score"
|
||||||
|
|> objectIn "function_score"
|
||||||
|
|> objectIn "rescore_query"
|
||||||
|
|> List.append ("total" |> stringIn "score_mode")
|
||||||
|
|> objectIn "query"
|
||||||
|
|> List.append [ ( "window_size", Json.Encode.int 1000 ) ]
|
||||||
|
|> objectIn "rescore"
|
||||||
|
)
|
||||||
|> List.append
|
|> List.append
|
||||||
[ ( "from", Json.Encode.int from )
|
[ ( "from", Json.Encode.int from )
|
||||||
, ( "size", Json.Encode.int size )
|
, ( "size", Json.Encode.int size )
|
||||||
|
@ -290,7 +375,7 @@ makeRequest :
|
||||||
makeRequest options channel query from size =
|
makeRequest options channel query from size =
|
||||||
Search.makeRequest
|
Search.makeRequest
|
||||||
(makeRequestBody query from size)
|
(makeRequestBody query from size)
|
||||||
("latest-nixos-" ++ channel)
|
("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-nixos-" ++ channel)
|
||||||
decodeResultItemSource
|
decodeResultItemSource
|
||||||
options
|
options
|
||||||
query
|
query
|
||||||
|
|
|
@ -429,7 +429,8 @@ viewPager outMsg model result path =
|
||||||
|
|
||||||
|
|
||||||
type alias Options =
|
type alias Options =
|
||||||
{ url : String
|
{ mappingSchemaVersion : Int
|
||||||
|
, url : String
|
||||||
, username : String
|
, username : String
|
||||||
, password : String
|
, password : String
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@ require("./index.scss");
|
||||||
const {Elm} = require('./Main');
|
const {Elm} = require('./Main');
|
||||||
|
|
||||||
Elm.Main.init({flags: {
|
Elm.Main.init({flags: {
|
||||||
|
elasticsearchMappingSchemaVersion: process.env.ELASTICSEARCH_MAPPING_SCHEMA_VERSION || 2,
|
||||||
elasticsearchUrl: process.env.ELASTICSEARCH_URL || 'https://nixos-search-5886075189.us-east-1.bonsaisearch.net:443',
|
elasticsearchUrl: process.env.ELASTICSEARCH_URL || 'https://nixos-search-5886075189.us-east-1.bonsaisearch.net:443',
|
||||||
elasticsearchUsername : process.env.ELASTICSEARCH_USERNAME || 'z3ZFJ6y2mR',
|
elasticsearchUsername : process.env.ELASTICSEARCH_USERNAME || 'z3ZFJ6y2mR',
|
||||||
elasticsearchPassword : process.env.ELASTICSEARCH_PASSWORD || 'ds8CEvALPf9pui7XG'
|
elasticsearchPassword : process.env.ELASTICSEARCH_PASSWORD || 'ds8CEvALPf9pui7XG'
|
||||||
|
|
Loading…
Reference in a new issue