diff --git a/scripts/import-channel b/scripts/import-channel index 480d22b..c39ee99 100755 --- a/scripts/import-channel +++ b/scripts/import-channel @@ -29,7 +29,7 @@ click_log.basic_config(logger) CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) -INDEX_SCHEMA_VERSION = 1 +INDEX_SCHEMA_VERSION = 2 ANALYSIS = { "analyzer": { "nixAttrName": { @@ -37,6 +37,16 @@ ANALYSIS = { "tokenizer": "nix_attrname", "filter": ["lowercase", "nix_stopwords"], }, + "nixOptionName": { + "type": "custom", + "tokenizer": "nix_option_name", + "filter": ["lowercase"], + }, + "nixOptionNameGranular": { + "type": "custom", + "tokenizer": "nix_option_name_granular", + "filter": ["lowercase"], + }, }, "tokenizer": { "nix_attrname": { @@ -60,6 +70,31 @@ ANALYSIS = { ] ), }, + "nix_option_name": { + "type": "pattern", + "pattern": "[.]", + }, + # Lower priority (virtualHost -> [virtual, host]) + "nix_option_name_granular": { + "type": "pattern", + # Split on attrname separators like _, . + "pattern": "|".join( + [ + "[_.-]", # Common separators like underscores, dots and dashes + # Camelcase tokenizer adapted from + # https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html + "".join( + [ + "(?<=[\\p{L}&&[^\\p{Lu}]])" # lower case + "(?=\\p{Lu})", # followed by upper case + "|", + "(?<=\\p{Lu})" # or upper case + "(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])", # followed by lower case + ] + ), + ] + ), + }, }, "filter": { "nix_stopwords": { @@ -99,7 +134,20 @@ MAPPING = { "package_position": {"type": "text"}, "package_homepage": {"type": "keyword"}, # Options fields - "option_name": {"type": "keyword"}, + "option_name": { + "type": "text", + "analyzer": "nixOptionName", + "fielddata": True, + "fields": { + "raw": { + "type": "keyword" + }, + "granular": { + "type": "text", + "analyzer": "nixOptionNameGranular", + }, + }, + }, "option_description": {"type": "text"}, "option_type": {"type": "keyword"}, "option_default": {"type": "text"}, @@ -290,26 +338,29 @@ def ensure_index(es, index, mapping): def create_index_name(channel, evaluation): return ( - f"latest-{channel}", + f"latest-{INDEX_SCHEMA_VERSION}-{channel}", f"evaluation-{INDEX_SCHEMA_VERSION}-{channel}-{evaluation['revisions_since_start']}-{evaluation['git_revision']}", ) def update_alias(es, name, index): - indexes = set(es.indices.get_alias(name=name).keys()) + if es.indices.exists_alias(name=name): + indexes = set(es.indices.get_alias(name=name).keys()) - # indexes to remove from alias - actions = [ - {"remove": {"index": item, "alias": name}} - for item in indexes.difference(set([index])) - ] + # indexes to remove from alias + actions = [ + {"remove": {"index": item, "alias": name}} + for item in indexes.difference(set([index])) + ] - # add index if does not exists in alias - if index not in indexes: - actions.append({"add": {"index": index, "alias": name}}) + # add index if does not exists in alias + if index not in indexes: + actions.append({"add": {"index": index, "alias": name}}) - if actions: - es.indices.update_aliases({"actions": actions}) + if actions: + es.indices.update_aliases({"actions": actions}) + else: + es.indices.put_alias(index=index, name=name) indexes = ", ".join(es.indices.get_alias(name=name).keys()) logger.debug(f"'{name}' alias now points to '{indexes}' index") diff --git a/src/Main.elm b/src/Main.elm index d41e3f7..b354407 100644 --- a/src/Main.elm +++ b/src/Main.elm @@ -4,7 +4,6 @@ module Main exposing (main) import Browser import Browser.Navigation -import Search import Html exposing ( Html @@ -29,6 +28,7 @@ import Page.Options import Page.Packages import RemoteData import Route +import Search import Url @@ -37,7 +37,8 @@ import Url type alias Flags = - { elasticsearchUrl : String + { elasticsearchMappingSchemaVersion : Int + , elasticsearchUrl : String , elasticsearchUsername : String , elasticsearchPassword : String } @@ -70,6 +71,7 @@ init flags url navKey = , url = url , elasticsearch = Search.Options + flags.elasticsearchMappingSchemaVersion flags.elasticsearchUrl flags.elasticsearchUsername flags.elasticsearchPassword diff --git a/src/Page/Options.elm b/src/Page/Options.elm index 1a29869..2cff438 100644 --- a/src/Page/Options.elm +++ b/src/Page/Options.elm @@ -221,20 +221,85 @@ makeRequestBody query from size = -- example query for "python" -- { -- "from": 0, - -- "size": 10, + -- "size": 1000, -- "query": { -- "bool": { - -- "filter": { - -- "match": { - -- "type": "package" - -- }, + -- "must": { + -- "bool": { + -- "should": [ + -- { + -- "term": { + -- "option_name.raw": { + -- "value": "nginx", + -- "boost": 2.0 + -- } + -- } + -- }, + -- { + -- "term": { + -- "option_name": { + -- "value": "nginx", + -- "boost": 1.0 + -- } + -- } + -- }, + -- { + -- "term": { + -- "option_name.granular": { + -- "value": "nginx", + -- "boost": 0.6 + -- } + -- } + -- }, + -- { + -- "term": { + -- "option_description": { + -- "value": "nginx", + -- "boost": 0.3 + -- } + -- } + -- } + -- ] + -- } -- }, - -- "should": [ + -- "filter": [ + -- { + -- "match": { + -- "type": "option" + -- } + -- } -- ] -- } + -- }, + -- "rescore" : { + -- "window_size": 500, + -- "query" : { + -- "score_mode": "total", + -- "rescore_query" : { + -- "function_score" : { + -- "script_score": { + -- "script": { + -- "source": " + -- int i = 1; + -- for (token in doc['option_name.raw'][0].splitOnToken('.')) { + -- if (token == \"nginx\") { + -- return 10000 - (i * 100); + -- } + -- i++; + -- } + -- return 10; + -- " + -- } + -- } + -- } + -- } + -- } -- } -- } let + stringIn name value = + [ ( name, Json.Encode.string value ) ] + listIn name type_ value = [ ( name, Json.Encode.list type_ value ) ] @@ -254,24 +319,44 @@ makeRequestBody query from size = ) ] in - [ ( "option_name", 2.0 ) + [ ( "option_name.raw", 2.0 ) + , ( "option_name", 1.0 ) + , ( "option_name.granular", 0.6 ) , ( "option_description", 0.3 ) ] |> List.map encodeTerm |> listIn "should" Json.Encode.object - |> List.append - [ ( "filter" - , Json.Encode.object - [ ( "match" - , Json.Encode.object - [ ( "type", Json.Encode.string "option" ) - ] - ) - ] - ) - ] + |> objectIn "bool" + |> objectIn "must" + |> ([ ( "type", Json.Encode.string "option" ) ] + |> objectIn "match" + |> objectIn "filter" + |> List.append + ) |> objectIn "bool" |> objectIn "query" + |> List.append + ("""int i = 1; + for (token in doc['option_name.raw'][0].splitOnToken('.')) { + if (token == '""" + ++ query + ++ """') { + return 10000 - (i * 100); + } + i++; + } + return 10; + """ + |> stringIn "source" + |> objectIn "script" + |> objectIn "script_score" + |> objectIn "function_score" + |> objectIn "rescore_query" + |> List.append ("total" |> stringIn "score_mode") + |> objectIn "query" + |> List.append [ ( "window_size", Json.Encode.int 1000 ) ] + |> objectIn "rescore" + ) |> List.append [ ( "from", Json.Encode.int from ) , ( "size", Json.Encode.int size ) @@ -290,7 +375,7 @@ makeRequest : makeRequest options channel query from size = Search.makeRequest (makeRequestBody query from size) - ("latest-nixos-" ++ channel) + ("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-nixos-" ++ channel) decodeResultItemSource options query diff --git a/src/Search.elm b/src/Search.elm index cd1aaf7..c7ab04b 100644 --- a/src/Search.elm +++ b/src/Search.elm @@ -429,7 +429,8 @@ viewPager outMsg model result path = type alias Options = - { url : String + { mappingSchemaVersion : Int + , url : String , username : String , password : String } diff --git a/src/index.js b/src/index.js index 2396618..89db813 100644 --- a/src/index.js +++ b/src/index.js @@ -5,6 +5,7 @@ require("./index.scss"); const {Elm} = require('./Main'); Elm.Main.init({flags: { + elasticsearchMappingSchemaVersion: process.env.ELASTICSEARCH_MAPPING_SCHEMA_VERSION || 2, elasticsearchUrl: process.env.ELASTICSEARCH_URL || 'https://nixos-search-5886075189.us-east-1.bonsaisearch.net:443', elasticsearchUsername : process.env.ELASTICSEARCH_USERNAME || 'z3ZFJ6y2mR', elasticsearchPassword : process.env.ELASTICSEARCH_PASSWORD || 'ds8CEvALPf9pui7XG'