improve nixos options search (#55)
Co-authored-by: Rok Garbas <rok@garbas.si>
This commit is contained in:
parent
8896ea9e1c
commit
0a6369ea86
|
@ -29,7 +29,7 @@ click_log.basic_config(logger)
|
|||
|
||||
|
||||
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
INDEX_SCHEMA_VERSION = 1
|
||||
INDEX_SCHEMA_VERSION = 2
|
||||
ANALYSIS = {
|
||||
"analyzer": {
|
||||
"nixAttrName": {
|
||||
|
@ -37,6 +37,16 @@ ANALYSIS = {
|
|||
"tokenizer": "nix_attrname",
|
||||
"filter": ["lowercase", "nix_stopwords"],
|
||||
},
|
||||
"nixOptionName": {
|
||||
"type": "custom",
|
||||
"tokenizer": "nix_option_name",
|
||||
"filter": ["lowercase"],
|
||||
},
|
||||
"nixOptionNameGranular": {
|
||||
"type": "custom",
|
||||
"tokenizer": "nix_option_name_granular",
|
||||
"filter": ["lowercase"],
|
||||
},
|
||||
},
|
||||
"tokenizer": {
|
||||
"nix_attrname": {
|
||||
|
@ -60,6 +70,31 @@ ANALYSIS = {
|
|||
]
|
||||
),
|
||||
},
|
||||
"nix_option_name": {
|
||||
"type": "pattern",
|
||||
"pattern": "[.]",
|
||||
},
|
||||
# Lower priority (virtualHost -> [virtual, host])
|
||||
"nix_option_name_granular": {
|
||||
"type": "pattern",
|
||||
# Split on attrname separators like _, .
|
||||
"pattern": "|".join(
|
||||
[
|
||||
"[_.-]", # Common separators like underscores, dots and dashes
|
||||
# Camelcase tokenizer adapted from
|
||||
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
|
||||
"".join(
|
||||
[
|
||||
"(?<=[\\p{L}&&[^\\p{Lu}]])" # lower case
|
||||
"(?=\\p{Lu})", # followed by upper case
|
||||
"|",
|
||||
"(?<=\\p{Lu})" # or upper case
|
||||
"(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])", # followed by lower case
|
||||
]
|
||||
),
|
||||
]
|
||||
),
|
||||
},
|
||||
},
|
||||
"filter": {
|
||||
"nix_stopwords": {
|
||||
|
@ -99,7 +134,20 @@ MAPPING = {
|
|||
"package_position": {"type": "text"},
|
||||
"package_homepage": {"type": "keyword"},
|
||||
# Options fields
|
||||
"option_name": {"type": "keyword"},
|
||||
"option_name": {
|
||||
"type": "text",
|
||||
"analyzer": "nixOptionName",
|
||||
"fielddata": True,
|
||||
"fields": {
|
||||
"raw": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"granular": {
|
||||
"type": "text",
|
||||
"analyzer": "nixOptionNameGranular",
|
||||
},
|
||||
},
|
||||
},
|
||||
"option_description": {"type": "text"},
|
||||
"option_type": {"type": "keyword"},
|
||||
"option_default": {"type": "text"},
|
||||
|
@ -290,26 +338,29 @@ def ensure_index(es, index, mapping):
|
|||
|
||||
def create_index_name(channel, evaluation):
|
||||
return (
|
||||
f"latest-{channel}",
|
||||
f"latest-{INDEX_SCHEMA_VERSION}-{channel}",
|
||||
f"evaluation-{INDEX_SCHEMA_VERSION}-{channel}-{evaluation['revisions_since_start']}-{evaluation['git_revision']}",
|
||||
)
|
||||
|
||||
|
||||
def update_alias(es, name, index):
|
||||
indexes = set(es.indices.get_alias(name=name).keys())
|
||||
if es.indices.exists_alias(name=name):
|
||||
indexes = set(es.indices.get_alias(name=name).keys())
|
||||
|
||||
# indexes to remove from alias
|
||||
actions = [
|
||||
{"remove": {"index": item, "alias": name}}
|
||||
for item in indexes.difference(set([index]))
|
||||
]
|
||||
# indexes to remove from alias
|
||||
actions = [
|
||||
{"remove": {"index": item, "alias": name}}
|
||||
for item in indexes.difference(set([index]))
|
||||
]
|
||||
|
||||
# add index if does not exists in alias
|
||||
if index not in indexes:
|
||||
actions.append({"add": {"index": index, "alias": name}})
|
||||
# add index if does not exists in alias
|
||||
if index not in indexes:
|
||||
actions.append({"add": {"index": index, "alias": name}})
|
||||
|
||||
if actions:
|
||||
es.indices.update_aliases({"actions": actions})
|
||||
if actions:
|
||||
es.indices.update_aliases({"actions": actions})
|
||||
else:
|
||||
es.indices.put_alias(index=index, name=name)
|
||||
|
||||
indexes = ", ".join(es.indices.get_alias(name=name).keys())
|
||||
logger.debug(f"'{name}' alias now points to '{indexes}' index")
|
||||
|
|
|
@ -4,7 +4,6 @@ module Main exposing (main)
|
|||
|
||||
import Browser
|
||||
import Browser.Navigation
|
||||
import Search
|
||||
import Html
|
||||
exposing
|
||||
( Html
|
||||
|
@ -29,6 +28,7 @@ import Page.Options
|
|||
import Page.Packages
|
||||
import RemoteData
|
||||
import Route
|
||||
import Search
|
||||
import Url
|
||||
|
||||
|
||||
|
@ -37,7 +37,8 @@ import Url
|
|||
|
||||
|
||||
type alias Flags =
|
||||
{ elasticsearchUrl : String
|
||||
{ elasticsearchMappingSchemaVersion : Int
|
||||
, elasticsearchUrl : String
|
||||
, elasticsearchUsername : String
|
||||
, elasticsearchPassword : String
|
||||
}
|
||||
|
@ -70,6 +71,7 @@ init flags url navKey =
|
|||
, url = url
|
||||
, elasticsearch =
|
||||
Search.Options
|
||||
flags.elasticsearchMappingSchemaVersion
|
||||
flags.elasticsearchUrl
|
||||
flags.elasticsearchUsername
|
||||
flags.elasticsearchPassword
|
||||
|
|
|
@ -221,20 +221,85 @@ makeRequestBody query from size =
|
|||
-- example query for "python"
|
||||
-- {
|
||||
-- "from": 0,
|
||||
-- "size": 10,
|
||||
-- "size": 1000,
|
||||
-- "query": {
|
||||
-- "bool": {
|
||||
-- "filter": {
|
||||
-- "match": {
|
||||
-- "type": "package"
|
||||
-- },
|
||||
-- "must": {
|
||||
-- "bool": {
|
||||
-- "should": [
|
||||
-- {
|
||||
-- "term": {
|
||||
-- "option_name.raw": {
|
||||
-- "value": "nginx",
|
||||
-- "boost": 2.0
|
||||
-- }
|
||||
-- }
|
||||
-- },
|
||||
-- {
|
||||
-- "term": {
|
||||
-- "option_name": {
|
||||
-- "value": "nginx",
|
||||
-- "boost": 1.0
|
||||
-- }
|
||||
-- }
|
||||
-- },
|
||||
-- {
|
||||
-- "term": {
|
||||
-- "option_name.granular": {
|
||||
-- "value": "nginx",
|
||||
-- "boost": 0.6
|
||||
-- }
|
||||
-- }
|
||||
-- },
|
||||
-- {
|
||||
-- "term": {
|
||||
-- "option_description": {
|
||||
-- "value": "nginx",
|
||||
-- "boost": 0.3
|
||||
-- }
|
||||
-- }
|
||||
-- }
|
||||
-- ]
|
||||
-- }
|
||||
-- },
|
||||
-- "should": [
|
||||
-- "filter": [
|
||||
-- {
|
||||
-- "match": {
|
||||
-- "type": "option"
|
||||
-- }
|
||||
-- }
|
||||
-- ]
|
||||
-- }
|
||||
-- },
|
||||
-- "rescore" : {
|
||||
-- "window_size": 500,
|
||||
-- "query" : {
|
||||
-- "score_mode": "total",
|
||||
-- "rescore_query" : {
|
||||
-- "function_score" : {
|
||||
-- "script_score": {
|
||||
-- "script": {
|
||||
-- "source": "
|
||||
-- int i = 1;
|
||||
-- for (token in doc['option_name.raw'][0].splitOnToken('.')) {
|
||||
-- if (token == \"nginx\") {
|
||||
-- return 10000 - (i * 100);
|
||||
-- }
|
||||
-- i++;
|
||||
-- }
|
||||
-- return 10;
|
||||
-- "
|
||||
-- }
|
||||
-- }
|
||||
-- }
|
||||
-- }
|
||||
-- }
|
||||
-- }
|
||||
-- }
|
||||
let
|
||||
stringIn name value =
|
||||
[ ( name, Json.Encode.string value ) ]
|
||||
|
||||
listIn name type_ value =
|
||||
[ ( name, Json.Encode.list type_ value ) ]
|
||||
|
||||
|
@ -254,24 +319,44 @@ makeRequestBody query from size =
|
|||
)
|
||||
]
|
||||
in
|
||||
[ ( "option_name", 2.0 )
|
||||
[ ( "option_name.raw", 2.0 )
|
||||
, ( "option_name", 1.0 )
|
||||
, ( "option_name.granular", 0.6 )
|
||||
, ( "option_description", 0.3 )
|
||||
]
|
||||
|> List.map encodeTerm
|
||||
|> listIn "should" Json.Encode.object
|
||||
|> List.append
|
||||
[ ( "filter"
|
||||
, Json.Encode.object
|
||||
[ ( "match"
|
||||
, Json.Encode.object
|
||||
[ ( "type", Json.Encode.string "option" )
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
]
|
||||
|> objectIn "bool"
|
||||
|> objectIn "must"
|
||||
|> ([ ( "type", Json.Encode.string "option" ) ]
|
||||
|> objectIn "match"
|
||||
|> objectIn "filter"
|
||||
|> List.append
|
||||
)
|
||||
|> objectIn "bool"
|
||||
|> objectIn "query"
|
||||
|> List.append
|
||||
("""int i = 1;
|
||||
for (token in doc['option_name.raw'][0].splitOnToken('.')) {
|
||||
if (token == '"""
|
||||
++ query
|
||||
++ """') {
|
||||
return 10000 - (i * 100);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return 10;
|
||||
"""
|
||||
|> stringIn "source"
|
||||
|> objectIn "script"
|
||||
|> objectIn "script_score"
|
||||
|> objectIn "function_score"
|
||||
|> objectIn "rescore_query"
|
||||
|> List.append ("total" |> stringIn "score_mode")
|
||||
|> objectIn "query"
|
||||
|> List.append [ ( "window_size", Json.Encode.int 1000 ) ]
|
||||
|> objectIn "rescore"
|
||||
)
|
||||
|> List.append
|
||||
[ ( "from", Json.Encode.int from )
|
||||
, ( "size", Json.Encode.int size )
|
||||
|
@ -290,7 +375,7 @@ makeRequest :
|
|||
makeRequest options channel query from size =
|
||||
Search.makeRequest
|
||||
(makeRequestBody query from size)
|
||||
("latest-nixos-" ++ channel)
|
||||
("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-nixos-" ++ channel)
|
||||
decodeResultItemSource
|
||||
options
|
||||
query
|
||||
|
|
|
@ -429,7 +429,8 @@ viewPager outMsg model result path =
|
|||
|
||||
|
||||
type alias Options =
|
||||
{ url : String
|
||||
{ mappingSchemaVersion : Int
|
||||
, url : String
|
||||
, username : String
|
||||
, password : String
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ require("./index.scss");
|
|||
const {Elm} = require('./Main');
|
||||
|
||||
Elm.Main.init({flags: {
|
||||
elasticsearchMappingSchemaVersion: process.env.ELASTICSEARCH_MAPPING_SCHEMA_VERSION || 2,
|
||||
elasticsearchUrl: process.env.ELASTICSEARCH_URL || 'https://nixos-search-5886075189.us-east-1.bonsaisearch.net:443',
|
||||
elasticsearchUsername : process.env.ELASTICSEARCH_USERNAME || 'z3ZFJ6y2mR',
|
||||
elasticsearchPassword : process.env.ELASTICSEARCH_PASSWORD || 'ds8CEvALPf9pui7XG'
|
||||
|
|
Loading…
Reference in a new issue