From ae670990dd46205d9c4a4e193b1550a6615e1a41 Mon Sep 17 00:00:00 2001 From: Rok Garbas Date: Wed, 3 Jun 2020 21:02:12 +0200 Subject: [PATCH] use one index per channel (#54) --- scripts/import-channel | 183 ++++++++++++-------------- src/Main.elm | 6 +- src/Page/Options.elm | 113 +++++++++++++--- src/Page/Packages.elm | 177 +++++++++++++++++++++---- src/{ElasticSearch.elm => Search.elm} | 118 +---------------- 5 files changed, 338 insertions(+), 259 deletions(-) rename src/{ElasticSearch.elm => Search.elm} (81%) diff --git a/scripts/import-channel b/scripts/import-channel index 9fdcbb0..7877230 100755 --- a/scripts/import-channel +++ b/scripts/import-channel @@ -69,43 +69,51 @@ ANALYSIS = { }, }, } -PACKAGES_MAPPING = dict( - properties=dict( - attr_name=dict( - type="text", analyzer="nixAttrName", fields={"raw": {"type": "keyword"}}, - ), - attr_set=dict(type="keyword"), - pname=dict(type="keyword"), - pversion=dict(type="keyword"), - description=dict(type="text"), - longDescription=dict(type="text"), - license=dict( - type="nested", - properties=dict(fullName=dict(type="text"), url=dict(type="text"),), - ), - maintainers=dict( - type="nested", - properties=dict( - name=dict(type="text"), - email=dict(type="text"), - github=dict(type="text"), - ), - ), - platforms=dict(type="keyword"), - position=dict(type="text"), - homepage=dict(type="keyword"), - ), -) -OPTIONS_MAPPING = dict( - properties=dict( - option_name=dict(type="keyword"), - description=dict(type="text"), - type=dict(type="keyword"), - default=dict(type="text"), - example=dict(type="text"), - source=dict(type="keyword"), - ), -) +MAPPING = { + "properties": { + "type": {"type": "keyword"}, + # Package fields + "package_attr_name": { + "type": "text", + "analyzer": "nixAttrName", + "fields": { + "raw": { + "type": "keyword" + }, + }, + }, + "package_attr_set": {"type": "keyword"}, + "package_pname": {"type": "keyword"}, + "package_pversion": {"type": "keyword"}, + "package_description": {"type": "text"}, + "package_longDescription": {"type": "text"}, + "package_license": { + "type": "nested", + "properties": { + "fullName": {"type": "text"}, + "url": {"type": "text"}, + }, + }, + "package_maintainers": { + "type": "nested", + "properties": { + "name": {"type": "text"}, + "email": {"type": "text"}, + "github": {"type": "text"}, + }, + }, + "package_platforms": {"type": "keyword"}, + "package_position": {"type": "text"}, + "package_homepage": {"type": "keyword"}, + # Options fields + "option_name": {"type": "keyword"}, + "option_description": {"type": "text"}, + "option_type": {"type": "keyword"}, + "option_default": {"type": "text"}, + "option_example": {"type": "text"}, + "option_source": {"type": "keyword"}, + }, +} def get_last_evaluation(channel): @@ -213,21 +221,20 @@ def get_packages(evaluation): ): attr_set = None - doc = dict( - id=attr_name, - attr_name=attr_name, - attr_set=attr_set, - pname=data["pname"], - pversion=data["version"], - description=data["meta"].get("description"), - longDescription=data["meta"].get("longDescription", ""), - license=licenses, - maintainers=maintainers, - platforms=[i for i in platforms if i], - position=position, - homepage=data["meta"].get("homepage"), + yield dict( + type="package", + package_attr_name=attr_name, + package_attr_set=attr_set, + package_pname=data["pname"], + package_pversion=data["version"], + package_description=data["meta"].get("description"), + package_longDescription=data["meta"].get("longDescription", ""), + package_license=licenses, + package_maintainers=maintainers, + package_platforms=[i for i in platforms if i], + package_position=position, + package_homepage=data["meta"].get("homepage"), ) - yield doc logger.debug(f"get_packages: Found {len(packages)} packages") return len(packages), gen @@ -259,13 +266,13 @@ def get_options(evaluation): ): example = str(example["text"]) yield dict( - id=name, + type="option", option_name=name, - description=option.get("description"), - type=option.get("type"), - default=str(option.get("default")), - example=str(example), - source=option.get("declarations", [None])[0], + option_description=option.get("description"), + option_type=option.get("type"), + option_default=str(option.get("default")), + option_example=str(example), + option_source=option.get("declarations", [None])[0], ) return len(options), gen @@ -288,10 +295,10 @@ def ensure_index(es, index, mapping): return True -def ensure_index_name(type_, channel, evaluation): +def create_index_name(channel, evaluation): return ( - f"latest-{channel}-{type_}", - f"evaluation-{INDEX_SCHEMA_VERSION}-{channel}-{evaluation['revisions_since_start']}-{evaluation['git_revision']}-{type_}", + f"latest-{channel}", + f"evaluation-{INDEX_SCHEMA_VERSION}-{channel}-{evaluation['revisions_since_start']}-{evaluation['git_revision']}", ) @@ -300,6 +307,19 @@ def update_alias(es, name, index): logger.debug(f"'{name}' alias now points to '{index}' index") +def write(unit, es, index_name, number_of_items, item_generator): + if number_of_items: + click.echo(f"Indexing {unit}...") + progress = tqdm.tqdm(unit=unit, total=number_of_items) + successes = 0 + for ok, action in elasticsearch.helpers.streaming_bulk( + client=es, index=index_name, actions=item_generator() + ): + progress.update(1) + successes += ok + click.echo(f"Indexed {successes}/{number_of_items} {unit}") + + @click.command() @click.option("-u", "--es-url", help="Elasticsearch connection url") @click.option("-c", "--channel", help="NixOS channel name") @@ -320,44 +340,13 @@ def main(es_url, channel, verbose): es = elasticsearch.Elasticsearch([es_url]) # ensure indexes exist - options_alias, options_index = ensure_index_name("options", channel, evaluation) - packages_alias, packages_index = ensure_index_name("packages", channel, evaluation) - packages_index_created = ensure_index(es, packages_index, PACKAGES_MAPPING) - options_index_created = ensure_index(es, options_index, OPTIONS_MAPPING) + alias_name, index_name = create_index_name(channel, evaluation) + index_created = ensure_index(es, index_name, MAPPING) - # write packages - if packages_index_created: - number_of_packages, gen_packages = get_packages(evaluation) - if number_of_packages: - click.echo("Indexing packages...") - progress = tqdm.tqdm(unit="packages", total=number_of_packages) - successes = 0 - for ok, action in elasticsearch.helpers.streaming_bulk( - client=es, index=packages_index, actions=gen_packages() - ): - progress.update(1) - successes += ok - click.echo("Indexed %d/%d packages" % (successes, number_of_packages)) - - # write options - if options_index_created: - number_of_options, gen_options = get_options(evaluation) - if number_of_options: - click.echo("Indexing options...") - progress = tqdm.tqdm(unit="options", total=number_of_options) - successes = 0 - for ok, action in elasticsearch.helpers.streaming_bulk( - client=es, index=options_index, actions=gen_options() - ): - progress.update(1) - successes += ok - print("Indexed %d/%d options" % (successes, number_of_options)) - - # update alias - if packages_index_created: - update_alias(es, packages_alias, packages_index) - if options_index_created: - update_alias(es, options_alias, options_index) + if index_created: + write("packages", es, index_name, *get_packages(evaluation)) + write("options", es, index_name, *get_options(evaluation)) + update_alias(es, alias_name, index_name) if __name__ == "__main__": diff --git a/src/Main.elm b/src/Main.elm index a924a3a..d41e3f7 100644 --- a/src/Main.elm +++ b/src/Main.elm @@ -4,7 +4,7 @@ module Main exposing (main) import Browser import Browser.Navigation -import ElasticSearch +import Search import Html exposing ( Html @@ -46,7 +46,7 @@ type alias Flags = type alias Model = { navKey : Browser.Navigation.Key , url : Url.Url - , elasticsearch : ElasticSearch.Options + , elasticsearch : Search.Options , page : Page } @@ -69,7 +69,7 @@ init flags url navKey = { navKey = navKey , url = url , elasticsearch = - ElasticSearch.Options + Search.Options flags.elasticsearchUrl flags.elasticsearchUsername flags.elasticsearchPassword diff --git a/src/Page/Options.elm b/src/Page/Options.elm index 596fc58..1a29869 100644 --- a/src/Page/Options.elm +++ b/src/Page/Options.elm @@ -9,7 +9,6 @@ module Page.Options exposing ) import Browser.Navigation -import ElasticSearch import Html exposing ( Html @@ -41,7 +40,10 @@ import Html.Events ) import Html.Parser import Html.Parser.Util +import Http import Json.Decode +import Json.Encode +import Search @@ -49,11 +51,11 @@ import Json.Decode type alias Model = - ElasticSearch.Model ResultItemSource + Search.Model ResultItemSource type alias ResultItemSource = - { option_name : String + { name : String , description : String , type_ : String , default : String @@ -70,7 +72,7 @@ init : -> Maybe Int -> ( Model, Cmd Msg ) init = - ElasticSearch.init + Search.init @@ -78,7 +80,7 @@ init = type Msg - = SearchMsg (ElasticSearch.Msg ResultItemSource) + = SearchMsg (Search.Msg ResultItemSource) update : Browser.Navigation.Key -> Msg -> Model -> ( Model, Cmd Msg ) @@ -87,7 +89,7 @@ update navKey msg model = SearchMsg subMsg -> let ( newModel, newCmd ) = - ElasticSearch.update "options" navKey subMsg model + Search.update "options" navKey subMsg model in ( newModel, Cmd.map SearchMsg newCmd ) @@ -98,7 +100,7 @@ update navKey msg model = view : Model -> Html Msg view model = - ElasticSearch.view + Search.view "options" "Search NixOS options" model @@ -108,7 +110,7 @@ view model = viewSuccess : Maybe String - -> ElasticSearch.Result ResultItemSource + -> Search.Result ResultItemSource -> Html Msg viewSuccess showDetailsFor result = div [ class "search-result" ] @@ -130,7 +132,7 @@ viewSuccess showDetailsFor result = viewResultItem : Maybe String - -> ElasticSearch.ResultItem ResultItemSource + -> Search.ResultItem ResultItemSource -> List (Html Msg) viewResultItem showDetailsFor item = let @@ -142,14 +144,14 @@ viewResultItem showDetailsFor item = else [] in - tr [ onClick (SearchMsg (ElasticSearch.ShowDetails item.id)) ] - [ td [] [ text item.source.option_name ] + tr [ onClick (SearchMsg (Search.ShowDetails item.id)) ] + [ td [] [ text item.source.name ] ] :: packageDetails viewResultItemDetails : - ElasticSearch.ResultItem ResultItemSource + Search.ResultItem ResultItemSource -> Html Msg viewResultItemDetails item = let @@ -209,17 +211,86 @@ viewResultItemDetails item = -- API +makeRequestBody : + String + -> Int + -> Int + -> Http.Body +makeRequestBody query from size = + -- Prefix Query + -- example query for "python" + -- { + -- "from": 0, + -- "size": 10, + -- "query": { + -- "bool": { + -- "filter": { + -- "match": { + -- "type": "package" + -- }, + -- }, + -- "should": [ + -- ] + -- } + -- } + -- } + let + listIn name type_ value = + [ ( name, Json.Encode.list type_ value ) ] + + objectIn name value = + [ ( name, Json.Encode.object value ) ] + + encodeTerm ( name, boost ) = + [ ( "term" + , Json.Encode.object + [ ( name + , Json.Encode.object + [ ( "value", Json.Encode.string query ) + , ( "boost", Json.Encode.float boost ) + ] + ) + ] + ) + ] + in + [ ( "option_name", 2.0 ) + , ( "option_description", 0.3 ) + ] + |> List.map encodeTerm + |> listIn "should" Json.Encode.object + |> List.append + [ ( "filter" + , Json.Encode.object + [ ( "match" + , Json.Encode.object + [ ( "type", Json.Encode.string "option" ) + ] + ) + ] + ) + ] + |> objectIn "bool" + |> objectIn "query" + |> List.append + [ ( "from", Json.Encode.int from ) + , ( "size", Json.Encode.int size ) + ] + |> Json.Encode.object + |> Http.jsonBody + + makeRequest : - ElasticSearch.Options + Search.Options -> String -> String -> Int -> Int -> Cmd Msg makeRequest options channel query from size = - ElasticSearch.makeRequest - "option_name" - ("latest-nixos-" ++ channel ++ "-options") + Search.makeRequest + (makeRequestBody query from size) + ("latest-nixos-" ++ channel) decodeResultItemSource options query @@ -236,8 +307,8 @@ decodeResultItemSource : Json.Decode.Decoder ResultItemSource decodeResultItemSource = Json.Decode.map6 ResultItemSource (Json.Decode.field "option_name" Json.Decode.string) - (Json.Decode.field "description" Json.Decode.string) - (Json.Decode.field "type" Json.Decode.string) - (Json.Decode.field "default" Json.Decode.string) - (Json.Decode.field "example" Json.Decode.string) - (Json.Decode.field "source" Json.Decode.string) + (Json.Decode.field "option_description" Json.Decode.string) + (Json.Decode.field "option_type" Json.Decode.string) + (Json.Decode.field "option_default" Json.Decode.string) + (Json.Decode.field "option_example" Json.Decode.string) + (Json.Decode.field "option_source" Json.Decode.string) diff --git a/src/Page/Packages.elm b/src/Page/Packages.elm index b6e711b..a80470c 100644 --- a/src/Page/Packages.elm +++ b/src/Page/Packages.elm @@ -9,7 +9,6 @@ module Page.Packages exposing ) import Browser.Navigation -import ElasticSearch import Html exposing ( Html @@ -39,8 +38,11 @@ import Html.Events exposing ( onClick ) +import Http import Json.Decode import Json.Decode.Pipeline +import Json.Encode +import Search @@ -48,7 +50,7 @@ import Json.Decode.Pipeline type alias Model = - ElasticSearch.Model ResultItemSource + Search.Model ResultItemSource type alias ResultItemSource = @@ -86,7 +88,7 @@ init : -> Maybe Int -> ( Model, Cmd Msg ) init = - ElasticSearch.init + Search.init @@ -94,7 +96,7 @@ init = type Msg - = SearchMsg (ElasticSearch.Msg ResultItemSource) + = SearchMsg (Search.Msg ResultItemSource) update : Browser.Navigation.Key -> Msg -> Model -> ( Model, Cmd Msg ) @@ -103,7 +105,7 @@ update navKey msg model = SearchMsg subMsg -> let ( newModel, newCmd ) = - ElasticSearch.update "packages" navKey subMsg model + Search.update "packages" navKey subMsg model in ( newModel, Cmd.map SearchMsg newCmd ) @@ -114,7 +116,7 @@ update navKey msg model = view : Model -> Html Msg view model = - ElasticSearch.view + Search.view "packages" "Search NixOS packages" model @@ -124,7 +126,7 @@ view model = viewSuccess : Maybe String - -> ElasticSearch.Result ResultItemSource + -> Search.Result ResultItemSource -> Html Msg viewSuccess showDetailsFor result = div [ class "search-result" ] @@ -149,7 +151,7 @@ viewSuccess showDetailsFor result = viewResultItem : Maybe String - -> ElasticSearch.ResultItem ResultItemSource + -> Search.ResultItem ResultItemSource -> List (Html Msg) viewResultItem showDetailsFor item = let @@ -161,7 +163,7 @@ viewResultItem showDetailsFor item = else [] in - tr [ onClick (SearchMsg (ElasticSearch.ShowDetails item.id)) ] + tr [ onClick (SearchMsg (Search.ShowDetails item.id)) ] [ td [] [ text item.source.attr_name ] , td [] [ text item.source.pname ] , td [] [ text item.source.pversion ] @@ -171,7 +173,7 @@ viewResultItem showDetailsFor item = viewResultItemDetails : - ElasticSearch.ResultItem ResultItemSource + Search.ResultItem ResultItemSource -> Html Msg viewResultItemDetails item = let @@ -277,17 +279,144 @@ viewResultItemDetails item = -- API +makeRequestBody : + String + -> Int + -> Int + -> Http.Body +makeRequestBody query from size = + -- Prefix Query + -- example query for "python" + -- { + -- "from": 0, + -- "size": 10, + -- "query": { + -- "bool": { + -- "filter": { + -- "match": { + -- "type": "package" + -- } + -- }, + -- "must": { + -- "bool": { + -- "should": [ + -- { + -- "multi_match": { + -- "query": "python", + -- "boost": 1, + -- "fields": [ + -- "package_attr_name.raw", + -- "package_attr_name" + -- ], + -- "type": "most_fields" + -- } + -- }, + -- { + -- "term": { + -- "type": { + -- "value": "package", + -- "boost": 0 + -- } + -- } + -- }, + -- { + -- "term": { + -- "package_pname": { + -- "value": "python", + -- "boost": 2 + -- } + -- } + -- }, + -- { + -- "term": { + -- "package_pversion": { + -- "value": "python", + -- "boost": 0.2 + -- } + -- } + -- }, + -- { + -- "term": { + -- "package_description": { + -- "value": "python", + -- "boost": 0.3 + -- } + -- } + -- }, + -- { + -- "term": { + -- "package_longDescription": { + -- "value": "python", + -- "boost": 0.1 + -- } + -- } + -- } + -- ] + -- } + -- } + -- } + -- } + -- } + let + listIn name type_ value = + [ ( name, Json.Encode.list type_ value ) ] + + objectIn name value = + [ ( name, Json.Encode.object value ) ] + + encodeTerm ( name, boost ) = + [ ( "value", Json.Encode.string query ) + , ( "boost", Json.Encode.float boost ) + ] + |> objectIn name + |> objectIn "term" + in + [ ( "package_pname", 2.0 ) + , ( "package_pversion", 0.2 ) + , ( "package_description", 0.3 ) + , ( "package_longDescription", 0.1 ) + ] + |> List.map encodeTerm + |> List.append + [ [ "package_attr_name.raw" + , "package_attr_name" + ] + |> listIn "fields" Json.Encode.string + |> List.append + [ ( "query", Json.Encode.string query ) + , ( "boost", Json.Encode.float 1.0 ) + ] + |> objectIn "multi_match" + ] + |> listIn "should" Json.Encode.object + |> objectIn "bool" + |> objectIn "must" + |> ([ ( "type", Json.Encode.string "package" ) ] + |> objectIn "match" + |> objectIn "filter" + |> List.append + ) + |> objectIn "bool" + |> objectIn "query" + |> List.append + [ ( "from", Json.Encode.int from ) + , ( "size", Json.Encode.int size ) + ] + |> Json.Encode.object + |> Http.jsonBody + + makeRequest : - ElasticSearch.Options + Search.Options -> String -> String -> Int -> Int -> Cmd Msg makeRequest options channel query from size = - ElasticSearch.makeRequest - "attr_name" - ("latest-nixos-" ++ channel ++ "-packages") + Search.makeRequest + (makeRequestBody query from size) + ("latest-nixos-" ++ channel) decodeResultItemSource options query @@ -303,16 +432,16 @@ makeRequest options channel query from size = decodeResultItemSource : Json.Decode.Decoder ResultItemSource decodeResultItemSource = Json.Decode.succeed ResultItemSource - |> Json.Decode.Pipeline.required "attr_name" Json.Decode.string - |> Json.Decode.Pipeline.required "pname" Json.Decode.string - |> Json.Decode.Pipeline.required "pversion" Json.Decode.string - |> Json.Decode.Pipeline.required "description" (Json.Decode.nullable Json.Decode.string) - |> Json.Decode.Pipeline.required "longDescription" (Json.Decode.nullable Json.Decode.string) - |> Json.Decode.Pipeline.required "license" (Json.Decode.list decodeResultPackageLicense) - |> Json.Decode.Pipeline.required "maintainers" (Json.Decode.list decodeResultPackageMaintainer) - |> Json.Decode.Pipeline.required "platforms" (Json.Decode.list Json.Decode.string) - |> Json.Decode.Pipeline.required "position" (Json.Decode.nullable Json.Decode.string) - |> Json.Decode.Pipeline.required "homepage" (Json.Decode.nullable Json.Decode.string) + |> Json.Decode.Pipeline.required "package_attr_name" Json.Decode.string + |> Json.Decode.Pipeline.required "package_pname" Json.Decode.string + |> Json.Decode.Pipeline.required "package_pversion" Json.Decode.string + |> Json.Decode.Pipeline.required "package_description" (Json.Decode.nullable Json.Decode.string) + |> Json.Decode.Pipeline.required "package_longDescription" (Json.Decode.nullable Json.Decode.string) + |> Json.Decode.Pipeline.required "package_license" (Json.Decode.list decodeResultPackageLicense) + |> Json.Decode.Pipeline.required "package_maintainers" (Json.Decode.list decodeResultPackageMaintainer) + |> Json.Decode.Pipeline.required "package_platforms" (Json.Decode.list Json.Decode.string) + |> Json.Decode.Pipeline.required "package_position" (Json.Decode.nullable Json.Decode.string) + |> Json.Decode.Pipeline.required "package_homepage" (Json.Decode.nullable Json.Decode.string) decodeResultPackageLicense : Json.Decode.Decoder ResultPackageLicense diff --git a/src/ElasticSearch.elm b/src/Search.elm similarity index 81% rename from src/ElasticSearch.elm rename to src/Search.elm index 0cf819a..cd1aaf7 100644 --- a/src/ElasticSearch.elm +++ b/src/Search.elm @@ -1,4 +1,4 @@ -module ElasticSearch exposing +module Search exposing ( Model , Msg(..) , Options @@ -435,118 +435,8 @@ type alias Options = } -makeRequestBody : - String - -> String - -> Int - -> Int - -> Http.Body -makeRequestBody field query from size = - -- Prefix Query - -- example query for "python" - -- { - -- "from": 0, - -- "size": 10, - -- "query": { - -- "bool": { - -- "should": [ - -- { - -- "multi_match": { - -- "query": "python", - -- "boost": 1, - -- "fields": [ - -- "attr_name.raw", - -- "attr_name" - -- ], - -- "type": "most_fields" - -- } - -- }, - -- { - -- "term": { - -- "pname": { - -- "value": "python", - -- "boost": 2 - -- } - -- } - -- }, - -- { - -- "term": { - -- "pversion": { - -- "value": "python", - -- "boost": 0.2 - -- } - -- } - -- }, - -- { - -- "term": { - -- "description": { - -- "value": "python", - -- "boost": 0.3 - -- } - -- } - -- }, - -- { - -- "term": { - -- "longDescription": { - -- "value": "python", - -- "boost": 0.1 - -- } - -- } - -- } - -- ] - -- } - -- } - -- } - let - listIn name type_ value = - [ ( name, Json.Encode.list type_ value ) ] - - objectIn name value = - [ ( name, Json.Encode.object value ) ] - - encodeTerm ( name, boost ) = - [ ( "term" - , Json.Encode.object - [ ( name - , Json.Encode.object - [ ( "value", Json.Encode.string query ) - , ( "boost", Json.Encode.float boost ) - ] - ) - ] - ) - ] - in - [ ( "pname", 2.0 ) - , ( "pversion", 0.2 ) - , ( "description", 0.3 ) - , ( "longDescription", 0.1 ) - ] - |> List.map encodeTerm - |> List.append - [ [ "attr_name.raw" - , "attr_name" - ] - |> listIn "fields" Json.Encode.string - |> List.append - [ ( "query", Json.Encode.string query ) - , ( "boost", Json.Encode.float 1.0 ) - ] - |> objectIn "multi_match" - ] - |> listIn "should" Json.Encode.object - |> objectIn "bool" - |> objectIn "query" - |> List.append - [ ( "from", Json.Encode.int from ) - , ( "size", Json.Encode.int size ) - ] - |> Json.Encode.object - |> Http.jsonBody - - makeRequest : - String + Http.Body -> String -> Json.Decode.Decoder a -> Options @@ -554,14 +444,14 @@ makeRequest : -> Int -> Int -> Cmd (Msg a) -makeRequest field index decodeResultItemSource options query from size = +makeRequest body index decodeResultItemSource options query from size = Http.riskyRequest { method = "POST" , headers = [ Http.header "Authorization" ("Basic " ++ Base64.encode (options.username ++ ":" ++ options.password)) ] , url = options.url ++ "/" ++ index ++ "/_search" - , body = makeRequestBody field query from size + , body = body , expect = Http.expectJson (RemoteData.fromResult >> QueryResponse)