improve nixos options search (#55)

Co-authored-by: Rok Garbas <rok@garbas.si>
This commit is contained in:
adisbladis 2020-06-04 00:33:54 +01:00 committed by GitHub
parent 8896ea9e1c
commit 0a6369ea86
Failed to generate hash of commit
5 changed files with 176 additions and 36 deletions

View file

@ -29,7 +29,7 @@ click_log.basic_config(logger)
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
INDEX_SCHEMA_VERSION = 1 INDEX_SCHEMA_VERSION = 2
ANALYSIS = { ANALYSIS = {
"analyzer": { "analyzer": {
"nixAttrName": { "nixAttrName": {
@ -37,6 +37,16 @@ ANALYSIS = {
"tokenizer": "nix_attrname", "tokenizer": "nix_attrname",
"filter": ["lowercase", "nix_stopwords"], "filter": ["lowercase", "nix_stopwords"],
}, },
"nixOptionName": {
"type": "custom",
"tokenizer": "nix_option_name",
"filter": ["lowercase"],
},
"nixOptionNameGranular": {
"type": "custom",
"tokenizer": "nix_option_name_granular",
"filter": ["lowercase"],
},
}, },
"tokenizer": { "tokenizer": {
"nix_attrname": { "nix_attrname": {
@ -60,6 +70,31 @@ ANALYSIS = {
] ]
), ),
}, },
"nix_option_name": {
"type": "pattern",
"pattern": "[.]",
},
# Lower priority (virtualHost -> [virtual, host])
"nix_option_name_granular": {
"type": "pattern",
# Split on attrname separators like _, .
"pattern": "|".join(
[
"[_.-]", # Common separators like underscores, dots and dashes
# Camelcase tokenizer adapted from
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
"".join(
[
"(?<=[\\p{L}&&[^\\p{Lu}]])" # lower case
"(?=\\p{Lu})", # followed by upper case
"|",
"(?<=\\p{Lu})" # or upper case
"(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])", # followed by lower case
]
),
]
),
},
}, },
"filter": { "filter": {
"nix_stopwords": { "nix_stopwords": {
@ -99,7 +134,20 @@ MAPPING = {
"package_position": {"type": "text"}, "package_position": {"type": "text"},
"package_homepage": {"type": "keyword"}, "package_homepage": {"type": "keyword"},
# Options fields # Options fields
"option_name": {"type": "keyword"}, "option_name": {
"type": "text",
"analyzer": "nixOptionName",
"fielddata": True,
"fields": {
"raw": {
"type": "keyword"
},
"granular": {
"type": "text",
"analyzer": "nixOptionNameGranular",
},
},
},
"option_description": {"type": "text"}, "option_description": {"type": "text"},
"option_type": {"type": "keyword"}, "option_type": {"type": "keyword"},
"option_default": {"type": "text"}, "option_default": {"type": "text"},
@ -290,26 +338,29 @@ def ensure_index(es, index, mapping):
def create_index_name(channel, evaluation): def create_index_name(channel, evaluation):
return ( return (
f"latest-{channel}", f"latest-{INDEX_SCHEMA_VERSION}-{channel}",
f"evaluation-{INDEX_SCHEMA_VERSION}-{channel}-{evaluation['revisions_since_start']}-{evaluation['git_revision']}", f"evaluation-{INDEX_SCHEMA_VERSION}-{channel}-{evaluation['revisions_since_start']}-{evaluation['git_revision']}",
) )
def update_alias(es, name, index): def update_alias(es, name, index):
indexes = set(es.indices.get_alias(name=name).keys()) if es.indices.exists_alias(name=name):
indexes = set(es.indices.get_alias(name=name).keys())
# indexes to remove from alias # indexes to remove from alias
actions = [ actions = [
{"remove": {"index": item, "alias": name}} {"remove": {"index": item, "alias": name}}
for item in indexes.difference(set([index])) for item in indexes.difference(set([index]))
] ]
# add index if does not exists in alias # add index if does not exists in alias
if index not in indexes: if index not in indexes:
actions.append({"add": {"index": index, "alias": name}}) actions.append({"add": {"index": index, "alias": name}})
if actions: if actions:
es.indices.update_aliases({"actions": actions}) es.indices.update_aliases({"actions": actions})
else:
es.indices.put_alias(index=index, name=name)
indexes = ", ".join(es.indices.get_alias(name=name).keys()) indexes = ", ".join(es.indices.get_alias(name=name).keys())
logger.debug(f"'{name}' alias now points to '{indexes}' index") logger.debug(f"'{name}' alias now points to '{indexes}' index")

View file

@ -4,7 +4,6 @@ module Main exposing (main)
import Browser import Browser
import Browser.Navigation import Browser.Navigation
import Search
import Html import Html
exposing exposing
( Html ( Html
@ -29,6 +28,7 @@ import Page.Options
import Page.Packages import Page.Packages
import RemoteData import RemoteData
import Route import Route
import Search
import Url import Url
@ -37,7 +37,8 @@ import Url
type alias Flags = type alias Flags =
{ elasticsearchUrl : String { elasticsearchMappingSchemaVersion : Int
, elasticsearchUrl : String
, elasticsearchUsername : String , elasticsearchUsername : String
, elasticsearchPassword : String , elasticsearchPassword : String
} }
@ -70,6 +71,7 @@ init flags url navKey =
, url = url , url = url
, elasticsearch = , elasticsearch =
Search.Options Search.Options
flags.elasticsearchMappingSchemaVersion
flags.elasticsearchUrl flags.elasticsearchUrl
flags.elasticsearchUsername flags.elasticsearchUsername
flags.elasticsearchPassword flags.elasticsearchPassword

View file

@ -221,20 +221,85 @@ makeRequestBody query from size =
-- example query for "python" -- example query for "python"
-- { -- {
-- "from": 0, -- "from": 0,
-- "size": 10, -- "size": 1000,
-- "query": { -- "query": {
-- "bool": { -- "bool": {
-- "filter": { -- "must": {
-- "match": { -- "bool": {
-- "type": "package" -- "should": [
-- }, -- {
-- "term": {
-- "option_name.raw": {
-- "value": "nginx",
-- "boost": 2.0
-- }
-- }
-- },
-- {
-- "term": {
-- "option_name": {
-- "value": "nginx",
-- "boost": 1.0
-- }
-- }
-- },
-- {
-- "term": {
-- "option_name.granular": {
-- "value": "nginx",
-- "boost": 0.6
-- }
-- }
-- },
-- {
-- "term": {
-- "option_description": {
-- "value": "nginx",
-- "boost": 0.3
-- }
-- }
-- }
-- ]
-- }
-- }, -- },
-- "should": [ -- "filter": [
-- {
-- "match": {
-- "type": "option"
-- }
-- }
-- ] -- ]
-- } -- }
-- },
-- "rescore" : {
-- "window_size": 500,
-- "query" : {
-- "score_mode": "total",
-- "rescore_query" : {
-- "function_score" : {
-- "script_score": {
-- "script": {
-- "source": "
-- int i = 1;
-- for (token in doc['option_name.raw'][0].splitOnToken('.')) {
-- if (token == \"nginx\") {
-- return 10000 - (i * 100);
-- }
-- i++;
-- }
-- return 10;
-- "
-- }
-- }
-- }
-- }
-- }
-- } -- }
-- } -- }
let let
stringIn name value =
[ ( name, Json.Encode.string value ) ]
listIn name type_ value = listIn name type_ value =
[ ( name, Json.Encode.list type_ value ) ] [ ( name, Json.Encode.list type_ value ) ]
@ -254,24 +319,44 @@ makeRequestBody query from size =
) )
] ]
in in
[ ( "option_name", 2.0 ) [ ( "option_name.raw", 2.0 )
, ( "option_name", 1.0 )
, ( "option_name.granular", 0.6 )
, ( "option_description", 0.3 ) , ( "option_description", 0.3 )
] ]
|> List.map encodeTerm |> List.map encodeTerm
|> listIn "should" Json.Encode.object |> listIn "should" Json.Encode.object
|> List.append |> objectIn "bool"
[ ( "filter" |> objectIn "must"
, Json.Encode.object |> ([ ( "type", Json.Encode.string "option" ) ]
[ ( "match" |> objectIn "match"
, Json.Encode.object |> objectIn "filter"
[ ( "type", Json.Encode.string "option" ) |> List.append
] )
)
]
)
]
|> objectIn "bool" |> objectIn "bool"
|> objectIn "query" |> objectIn "query"
|> List.append
("""int i = 1;
for (token in doc['option_name.raw'][0].splitOnToken('.')) {
if (token == '"""
++ query
++ """') {
return 10000 - (i * 100);
}
i++;
}
return 10;
"""
|> stringIn "source"
|> objectIn "script"
|> objectIn "script_score"
|> objectIn "function_score"
|> objectIn "rescore_query"
|> List.append ("total" |> stringIn "score_mode")
|> objectIn "query"
|> List.append [ ( "window_size", Json.Encode.int 1000 ) ]
|> objectIn "rescore"
)
|> List.append |> List.append
[ ( "from", Json.Encode.int from ) [ ( "from", Json.Encode.int from )
, ( "size", Json.Encode.int size ) , ( "size", Json.Encode.int size )
@ -290,7 +375,7 @@ makeRequest :
makeRequest options channel query from size = makeRequest options channel query from size =
Search.makeRequest Search.makeRequest
(makeRequestBody query from size) (makeRequestBody query from size)
("latest-nixos-" ++ channel) ("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-nixos-" ++ channel)
decodeResultItemSource decodeResultItemSource
options options
query query

View file

@ -429,7 +429,8 @@ viewPager outMsg model result path =
type alias Options = type alias Options =
{ url : String { mappingSchemaVersion : Int
, url : String
, username : String , username : String
, password : String , password : String
} }

View file

@ -5,6 +5,7 @@ require("./index.scss");
const {Elm} = require('./Main'); const {Elm} = require('./Main');
Elm.Main.init({flags: { Elm.Main.init({flags: {
elasticsearchMappingSchemaVersion: process.env.ELASTICSEARCH_MAPPING_SCHEMA_VERSION || 2,
elasticsearchUrl: process.env.ELASTICSEARCH_URL || 'https://nixos-search-5886075189.us-east-1.bonsaisearch.net:443', elasticsearchUrl: process.env.ELASTICSEARCH_URL || 'https://nixos-search-5886075189.us-east-1.bonsaisearch.net:443',
elasticsearchUsername : process.env.ELASTICSEARCH_USERNAME || 'z3ZFJ6y2mR', elasticsearchUsername : process.env.ELASTICSEARCH_USERNAME || 'z3ZFJ6y2mR',
elasticsearchPassword : process.env.ELASTICSEARCH_PASSWORD || 'ds8CEvALPf9pui7XG' elasticsearchPassword : process.env.ELASTICSEARCH_PASSWORD || 'ds8CEvALPf9pui7XG'