use one index per channel (#54)

This commit is contained in:
Rok Garbas 2020-06-03 21:02:12 +02:00 committed by GitHub
parent fda96bda40
commit ae670990dd
Failed to generate hash of commit
5 changed files with 338 additions and 259 deletions

View file

@ -69,43 +69,51 @@ ANALYSIS = {
},
},
}
PACKAGES_MAPPING = dict(
properties=dict(
attr_name=dict(
type="text", analyzer="nixAttrName", fields={"raw": {"type": "keyword"}},
),
attr_set=dict(type="keyword"),
pname=dict(type="keyword"),
pversion=dict(type="keyword"),
description=dict(type="text"),
longDescription=dict(type="text"),
license=dict(
type="nested",
properties=dict(fullName=dict(type="text"), url=dict(type="text"),),
),
maintainers=dict(
type="nested",
properties=dict(
name=dict(type="text"),
email=dict(type="text"),
github=dict(type="text"),
),
),
platforms=dict(type="keyword"),
position=dict(type="text"),
homepage=dict(type="keyword"),
),
)
OPTIONS_MAPPING = dict(
properties=dict(
option_name=dict(type="keyword"),
description=dict(type="text"),
type=dict(type="keyword"),
default=dict(type="text"),
example=dict(type="text"),
source=dict(type="keyword"),
),
)
MAPPING = {
"properties": {
"type": {"type": "keyword"},
# Package fields
"package_attr_name": {
"type": "text",
"analyzer": "nixAttrName",
"fields": {
"raw": {
"type": "keyword"
},
},
},
"package_attr_set": {"type": "keyword"},
"package_pname": {"type": "keyword"},
"package_pversion": {"type": "keyword"},
"package_description": {"type": "text"},
"package_longDescription": {"type": "text"},
"package_license": {
"type": "nested",
"properties": {
"fullName": {"type": "text"},
"url": {"type": "text"},
},
},
"package_maintainers": {
"type": "nested",
"properties": {
"name": {"type": "text"},
"email": {"type": "text"},
"github": {"type": "text"},
},
},
"package_platforms": {"type": "keyword"},
"package_position": {"type": "text"},
"package_homepage": {"type": "keyword"},
# Options fields
"option_name": {"type": "keyword"},
"option_description": {"type": "text"},
"option_type": {"type": "keyword"},
"option_default": {"type": "text"},
"option_example": {"type": "text"},
"option_source": {"type": "keyword"},
},
}
def get_last_evaluation(channel):
@ -213,21 +221,20 @@ def get_packages(evaluation):
):
attr_set = None
doc = dict(
id=attr_name,
attr_name=attr_name,
attr_set=attr_set,
pname=data["pname"],
pversion=data["version"],
description=data["meta"].get("description"),
longDescription=data["meta"].get("longDescription", ""),
license=licenses,
maintainers=maintainers,
platforms=[i for i in platforms if i],
position=position,
homepage=data["meta"].get("homepage"),
yield dict(
type="package",
package_attr_name=attr_name,
package_attr_set=attr_set,
package_pname=data["pname"],
package_pversion=data["version"],
package_description=data["meta"].get("description"),
package_longDescription=data["meta"].get("longDescription", ""),
package_license=licenses,
package_maintainers=maintainers,
package_platforms=[i for i in platforms if i],
package_position=position,
package_homepage=data["meta"].get("homepage"),
)
yield doc
logger.debug(f"get_packages: Found {len(packages)} packages")
return len(packages), gen
@ -259,13 +266,13 @@ def get_options(evaluation):
):
example = str(example["text"])
yield dict(
id=name,
type="option",
option_name=name,
description=option.get("description"),
type=option.get("type"),
default=str(option.get("default")),
example=str(example),
source=option.get("declarations", [None])[0],
option_description=option.get("description"),
option_type=option.get("type"),
option_default=str(option.get("default")),
option_example=str(example),
option_source=option.get("declarations", [None])[0],
)
return len(options), gen
@ -288,10 +295,10 @@ def ensure_index(es, index, mapping):
return True
def ensure_index_name(type_, channel, evaluation):
def create_index_name(channel, evaluation):
return (
f"latest-{channel}-{type_}",
f"evaluation-{INDEX_SCHEMA_VERSION}-{channel}-{evaluation['revisions_since_start']}-{evaluation['git_revision']}-{type_}",
f"latest-{channel}",
f"evaluation-{INDEX_SCHEMA_VERSION}-{channel}-{evaluation['revisions_since_start']}-{evaluation['git_revision']}",
)
@ -300,6 +307,19 @@ def update_alias(es, name, index):
logger.debug(f"'{name}' alias now points to '{index}' index")
def write(unit, es, index_name, number_of_items, item_generator):
if number_of_items:
click.echo(f"Indexing {unit}...")
progress = tqdm.tqdm(unit=unit, total=number_of_items)
successes = 0
for ok, action in elasticsearch.helpers.streaming_bulk(
client=es, index=index_name, actions=item_generator()
):
progress.update(1)
successes += ok
click.echo(f"Indexed {successes}/{number_of_items} {unit}")
@click.command()
@click.option("-u", "--es-url", help="Elasticsearch connection url")
@click.option("-c", "--channel", help="NixOS channel name")
@ -320,44 +340,13 @@ def main(es_url, channel, verbose):
es = elasticsearch.Elasticsearch([es_url])
# ensure indexes exist
options_alias, options_index = ensure_index_name("options", channel, evaluation)
packages_alias, packages_index = ensure_index_name("packages", channel, evaluation)
packages_index_created = ensure_index(es, packages_index, PACKAGES_MAPPING)
options_index_created = ensure_index(es, options_index, OPTIONS_MAPPING)
alias_name, index_name = create_index_name(channel, evaluation)
index_created = ensure_index(es, index_name, MAPPING)
# write packages
if packages_index_created:
number_of_packages, gen_packages = get_packages(evaluation)
if number_of_packages:
click.echo("Indexing packages...")
progress = tqdm.tqdm(unit="packages", total=number_of_packages)
successes = 0
for ok, action in elasticsearch.helpers.streaming_bulk(
client=es, index=packages_index, actions=gen_packages()
):
progress.update(1)
successes += ok
click.echo("Indexed %d/%d packages" % (successes, number_of_packages))
# write options
if options_index_created:
number_of_options, gen_options = get_options(evaluation)
if number_of_options:
click.echo("Indexing options...")
progress = tqdm.tqdm(unit="options", total=number_of_options)
successes = 0
for ok, action in elasticsearch.helpers.streaming_bulk(
client=es, index=options_index, actions=gen_options()
):
progress.update(1)
successes += ok
print("Indexed %d/%d options" % (successes, number_of_options))
# update alias
if packages_index_created:
update_alias(es, packages_alias, packages_index)
if options_index_created:
update_alias(es, options_alias, options_index)
if index_created:
write("packages", es, index_name, *get_packages(evaluation))
write("options", es, index_name, *get_options(evaluation))
update_alias(es, alias_name, index_name)
if __name__ == "__main__":

View file

@ -4,7 +4,7 @@ module Main exposing (main)
import Browser
import Browser.Navigation
import ElasticSearch
import Search
import Html
exposing
( Html
@ -46,7 +46,7 @@ type alias Flags =
type alias Model =
{ navKey : Browser.Navigation.Key
, url : Url.Url
, elasticsearch : ElasticSearch.Options
, elasticsearch : Search.Options
, page : Page
}
@ -69,7 +69,7 @@ init flags url navKey =
{ navKey = navKey
, url = url
, elasticsearch =
ElasticSearch.Options
Search.Options
flags.elasticsearchUrl
flags.elasticsearchUsername
flags.elasticsearchPassword

View file

@ -9,7 +9,6 @@ module Page.Options exposing
)
import Browser.Navigation
import ElasticSearch
import Html
exposing
( Html
@ -41,7 +40,10 @@ import Html.Events
)
import Html.Parser
import Html.Parser.Util
import Http
import Json.Decode
import Json.Encode
import Search
@ -49,11 +51,11 @@ import Json.Decode
type alias Model =
ElasticSearch.Model ResultItemSource
Search.Model ResultItemSource
type alias ResultItemSource =
{ option_name : String
{ name : String
, description : String
, type_ : String
, default : String
@ -70,7 +72,7 @@ init :
-> Maybe Int
-> ( Model, Cmd Msg )
init =
ElasticSearch.init
Search.init
@ -78,7 +80,7 @@ init =
type Msg
= SearchMsg (ElasticSearch.Msg ResultItemSource)
= SearchMsg (Search.Msg ResultItemSource)
update : Browser.Navigation.Key -> Msg -> Model -> ( Model, Cmd Msg )
@ -87,7 +89,7 @@ update navKey msg model =
SearchMsg subMsg ->
let
( newModel, newCmd ) =
ElasticSearch.update "options" navKey subMsg model
Search.update "options" navKey subMsg model
in
( newModel, Cmd.map SearchMsg newCmd )
@ -98,7 +100,7 @@ update navKey msg model =
view : Model -> Html Msg
view model =
ElasticSearch.view
Search.view
"options"
"Search NixOS options"
model
@ -108,7 +110,7 @@ view model =
viewSuccess :
Maybe String
-> ElasticSearch.Result ResultItemSource
-> Search.Result ResultItemSource
-> Html Msg
viewSuccess showDetailsFor result =
div [ class "search-result" ]
@ -130,7 +132,7 @@ viewSuccess showDetailsFor result =
viewResultItem :
Maybe String
-> ElasticSearch.ResultItem ResultItemSource
-> Search.ResultItem ResultItemSource
-> List (Html Msg)
viewResultItem showDetailsFor item =
let
@ -142,14 +144,14 @@ viewResultItem showDetailsFor item =
else
[]
in
tr [ onClick (SearchMsg (ElasticSearch.ShowDetails item.id)) ]
[ td [] [ text item.source.option_name ]
tr [ onClick (SearchMsg (Search.ShowDetails item.id)) ]
[ td [] [ text item.source.name ]
]
:: packageDetails
viewResultItemDetails :
ElasticSearch.ResultItem ResultItemSource
Search.ResultItem ResultItemSource
-> Html Msg
viewResultItemDetails item =
let
@ -209,17 +211,86 @@ viewResultItemDetails item =
-- API
makeRequestBody :
String
-> Int
-> Int
-> Http.Body
makeRequestBody query from size =
-- Prefix Query
-- example query for "python"
-- {
-- "from": 0,
-- "size": 10,
-- "query": {
-- "bool": {
-- "filter": {
-- "match": {
-- "type": "package"
-- },
-- },
-- "should": [
-- ]
-- }
-- }
-- }
let
listIn name type_ value =
[ ( name, Json.Encode.list type_ value ) ]
objectIn name value =
[ ( name, Json.Encode.object value ) ]
encodeTerm ( name, boost ) =
[ ( "term"
, Json.Encode.object
[ ( name
, Json.Encode.object
[ ( "value", Json.Encode.string query )
, ( "boost", Json.Encode.float boost )
]
)
]
)
]
in
[ ( "option_name", 2.0 )
, ( "option_description", 0.3 )
]
|> List.map encodeTerm
|> listIn "should" Json.Encode.object
|> List.append
[ ( "filter"
, Json.Encode.object
[ ( "match"
, Json.Encode.object
[ ( "type", Json.Encode.string "option" )
]
)
]
)
]
|> objectIn "bool"
|> objectIn "query"
|> List.append
[ ( "from", Json.Encode.int from )
, ( "size", Json.Encode.int size )
]
|> Json.Encode.object
|> Http.jsonBody
makeRequest :
ElasticSearch.Options
Search.Options
-> String
-> String
-> Int
-> Int
-> Cmd Msg
makeRequest options channel query from size =
ElasticSearch.makeRequest
"option_name"
("latest-nixos-" ++ channel ++ "-options")
Search.makeRequest
(makeRequestBody query from size)
("latest-nixos-" ++ channel)
decodeResultItemSource
options
query
@ -236,8 +307,8 @@ decodeResultItemSource : Json.Decode.Decoder ResultItemSource
decodeResultItemSource =
Json.Decode.map6 ResultItemSource
(Json.Decode.field "option_name" Json.Decode.string)
(Json.Decode.field "description" Json.Decode.string)
(Json.Decode.field "type" Json.Decode.string)
(Json.Decode.field "default" Json.Decode.string)
(Json.Decode.field "example" Json.Decode.string)
(Json.Decode.field "source" Json.Decode.string)
(Json.Decode.field "option_description" Json.Decode.string)
(Json.Decode.field "option_type" Json.Decode.string)
(Json.Decode.field "option_default" Json.Decode.string)
(Json.Decode.field "option_example" Json.Decode.string)
(Json.Decode.field "option_source" Json.Decode.string)

View file

@ -9,7 +9,6 @@ module Page.Packages exposing
)
import Browser.Navigation
import ElasticSearch
import Html
exposing
( Html
@ -39,8 +38,11 @@ import Html.Events
exposing
( onClick
)
import Http
import Json.Decode
import Json.Decode.Pipeline
import Json.Encode
import Search
@ -48,7 +50,7 @@ import Json.Decode.Pipeline
type alias Model =
ElasticSearch.Model ResultItemSource
Search.Model ResultItemSource
type alias ResultItemSource =
@ -86,7 +88,7 @@ init :
-> Maybe Int
-> ( Model, Cmd Msg )
init =
ElasticSearch.init
Search.init
@ -94,7 +96,7 @@ init =
type Msg
= SearchMsg (ElasticSearch.Msg ResultItemSource)
= SearchMsg (Search.Msg ResultItemSource)
update : Browser.Navigation.Key -> Msg -> Model -> ( Model, Cmd Msg )
@ -103,7 +105,7 @@ update navKey msg model =
SearchMsg subMsg ->
let
( newModel, newCmd ) =
ElasticSearch.update "packages" navKey subMsg model
Search.update "packages" navKey subMsg model
in
( newModel, Cmd.map SearchMsg newCmd )
@ -114,7 +116,7 @@ update navKey msg model =
view : Model -> Html Msg
view model =
ElasticSearch.view
Search.view
"packages"
"Search NixOS packages"
model
@ -124,7 +126,7 @@ view model =
viewSuccess :
Maybe String
-> ElasticSearch.Result ResultItemSource
-> Search.Result ResultItemSource
-> Html Msg
viewSuccess showDetailsFor result =
div [ class "search-result" ]
@ -149,7 +151,7 @@ viewSuccess showDetailsFor result =
viewResultItem :
Maybe String
-> ElasticSearch.ResultItem ResultItemSource
-> Search.ResultItem ResultItemSource
-> List (Html Msg)
viewResultItem showDetailsFor item =
let
@ -161,7 +163,7 @@ viewResultItem showDetailsFor item =
else
[]
in
tr [ onClick (SearchMsg (ElasticSearch.ShowDetails item.id)) ]
tr [ onClick (SearchMsg (Search.ShowDetails item.id)) ]
[ td [] [ text item.source.attr_name ]
, td [] [ text item.source.pname ]
, td [] [ text item.source.pversion ]
@ -171,7 +173,7 @@ viewResultItem showDetailsFor item =
viewResultItemDetails :
ElasticSearch.ResultItem ResultItemSource
Search.ResultItem ResultItemSource
-> Html Msg
viewResultItemDetails item =
let
@ -277,17 +279,144 @@ viewResultItemDetails item =
-- API
makeRequestBody :
String
-> Int
-> Int
-> Http.Body
makeRequestBody query from size =
-- Prefix Query
-- example query for "python"
-- {
-- "from": 0,
-- "size": 10,
-- "query": {
-- "bool": {
-- "filter": {
-- "match": {
-- "type": "package"
-- }
-- },
-- "must": {
-- "bool": {
-- "should": [
-- {
-- "multi_match": {
-- "query": "python",
-- "boost": 1,
-- "fields": [
-- "package_attr_name.raw",
-- "package_attr_name"
-- ],
-- "type": "most_fields"
-- }
-- },
-- {
-- "term": {
-- "type": {
-- "value": "package",
-- "boost": 0
-- }
-- }
-- },
-- {
-- "term": {
-- "package_pname": {
-- "value": "python",
-- "boost": 2
-- }
-- }
-- },
-- {
-- "term": {
-- "package_pversion": {
-- "value": "python",
-- "boost": 0.2
-- }
-- }
-- },
-- {
-- "term": {
-- "package_description": {
-- "value": "python",
-- "boost": 0.3
-- }
-- }
-- },
-- {
-- "term": {
-- "package_longDescription": {
-- "value": "python",
-- "boost": 0.1
-- }
-- }
-- }
-- ]
-- }
-- }
-- }
-- }
-- }
let
listIn name type_ value =
[ ( name, Json.Encode.list type_ value ) ]
objectIn name value =
[ ( name, Json.Encode.object value ) ]
encodeTerm ( name, boost ) =
[ ( "value", Json.Encode.string query )
, ( "boost", Json.Encode.float boost )
]
|> objectIn name
|> objectIn "term"
in
[ ( "package_pname", 2.0 )
, ( "package_pversion", 0.2 )
, ( "package_description", 0.3 )
, ( "package_longDescription", 0.1 )
]
|> List.map encodeTerm
|> List.append
[ [ "package_attr_name.raw"
, "package_attr_name"
]
|> listIn "fields" Json.Encode.string
|> List.append
[ ( "query", Json.Encode.string query )
, ( "boost", Json.Encode.float 1.0 )
]
|> objectIn "multi_match"
]
|> listIn "should" Json.Encode.object
|> objectIn "bool"
|> objectIn "must"
|> ([ ( "type", Json.Encode.string "package" ) ]
|> objectIn "match"
|> objectIn "filter"
|> List.append
)
|> objectIn "bool"
|> objectIn "query"
|> List.append
[ ( "from", Json.Encode.int from )
, ( "size", Json.Encode.int size )
]
|> Json.Encode.object
|> Http.jsonBody
makeRequest :
ElasticSearch.Options
Search.Options
-> String
-> String
-> Int
-> Int
-> Cmd Msg
makeRequest options channel query from size =
ElasticSearch.makeRequest
"attr_name"
("latest-nixos-" ++ channel ++ "-packages")
Search.makeRequest
(makeRequestBody query from size)
("latest-nixos-" ++ channel)
decodeResultItemSource
options
query
@ -303,16 +432,16 @@ makeRequest options channel query from size =
decodeResultItemSource : Json.Decode.Decoder ResultItemSource
decodeResultItemSource =
Json.Decode.succeed ResultItemSource
|> Json.Decode.Pipeline.required "attr_name" Json.Decode.string
|> Json.Decode.Pipeline.required "pname" Json.Decode.string
|> Json.Decode.Pipeline.required "pversion" Json.Decode.string
|> Json.Decode.Pipeline.required "description" (Json.Decode.nullable Json.Decode.string)
|> Json.Decode.Pipeline.required "longDescription" (Json.Decode.nullable Json.Decode.string)
|> Json.Decode.Pipeline.required "license" (Json.Decode.list decodeResultPackageLicense)
|> Json.Decode.Pipeline.required "maintainers" (Json.Decode.list decodeResultPackageMaintainer)
|> Json.Decode.Pipeline.required "platforms" (Json.Decode.list Json.Decode.string)
|> Json.Decode.Pipeline.required "position" (Json.Decode.nullable Json.Decode.string)
|> Json.Decode.Pipeline.required "homepage" (Json.Decode.nullable Json.Decode.string)
|> Json.Decode.Pipeline.required "package_attr_name" Json.Decode.string
|> Json.Decode.Pipeline.required "package_pname" Json.Decode.string
|> Json.Decode.Pipeline.required "package_pversion" Json.Decode.string
|> Json.Decode.Pipeline.required "package_description" (Json.Decode.nullable Json.Decode.string)
|> Json.Decode.Pipeline.required "package_longDescription" (Json.Decode.nullable Json.Decode.string)
|> Json.Decode.Pipeline.required "package_license" (Json.Decode.list decodeResultPackageLicense)
|> Json.Decode.Pipeline.required "package_maintainers" (Json.Decode.list decodeResultPackageMaintainer)
|> Json.Decode.Pipeline.required "package_platforms" (Json.Decode.list Json.Decode.string)
|> Json.Decode.Pipeline.required "package_position" (Json.Decode.nullable Json.Decode.string)
|> Json.Decode.Pipeline.required "package_homepage" (Json.Decode.nullable Json.Decode.string)
decodeResultPackageLicense : Json.Decode.Decoder ResultPackageLicense

View file

@ -1,4 +1,4 @@
module ElasticSearch exposing
module Search exposing
( Model
, Msg(..)
, Options
@ -435,118 +435,8 @@ type alias Options =
}
makeRequestBody :
String
-> String
-> Int
-> Int
-> Http.Body
makeRequestBody field query from size =
-- Prefix Query
-- example query for "python"
-- {
-- "from": 0,
-- "size": 10,
-- "query": {
-- "bool": {
-- "should": [
-- {
-- "multi_match": {
-- "query": "python",
-- "boost": 1,
-- "fields": [
-- "attr_name.raw",
-- "attr_name"
-- ],
-- "type": "most_fields"
-- }
-- },
-- {
-- "term": {
-- "pname": {
-- "value": "python",
-- "boost": 2
-- }
-- }
-- },
-- {
-- "term": {
-- "pversion": {
-- "value": "python",
-- "boost": 0.2
-- }
-- }
-- },
-- {
-- "term": {
-- "description": {
-- "value": "python",
-- "boost": 0.3
-- }
-- }
-- },
-- {
-- "term": {
-- "longDescription": {
-- "value": "python",
-- "boost": 0.1
-- }
-- }
-- }
-- ]
-- }
-- }
-- }
let
listIn name type_ value =
[ ( name, Json.Encode.list type_ value ) ]
objectIn name value =
[ ( name, Json.Encode.object value ) ]
encodeTerm ( name, boost ) =
[ ( "term"
, Json.Encode.object
[ ( name
, Json.Encode.object
[ ( "value", Json.Encode.string query )
, ( "boost", Json.Encode.float boost )
]
)
]
)
]
in
[ ( "pname", 2.0 )
, ( "pversion", 0.2 )
, ( "description", 0.3 )
, ( "longDescription", 0.1 )
]
|> List.map encodeTerm
|> List.append
[ [ "attr_name.raw"
, "attr_name"
]
|> listIn "fields" Json.Encode.string
|> List.append
[ ( "query", Json.Encode.string query )
, ( "boost", Json.Encode.float 1.0 )
]
|> objectIn "multi_match"
]
|> listIn "should" Json.Encode.object
|> objectIn "bool"
|> objectIn "query"
|> List.append
[ ( "from", Json.Encode.int from )
, ( "size", Json.Encode.int size )
]
|> Json.Encode.object
|> Http.jsonBody
makeRequest :
String
Http.Body
-> String
-> Json.Decode.Decoder a
-> Options
@ -554,14 +444,14 @@ makeRequest :
-> Int
-> Int
-> Cmd (Msg a)
makeRequest field index decodeResultItemSource options query from size =
makeRequest body index decodeResultItemSource options query from size =
Http.riskyRequest
{ method = "POST"
, headers =
[ Http.header "Authorization" ("Basic " ++ Base64.encode (options.username ++ ":" ++ options.password))
]
, url = options.url ++ "/" ++ index ++ "/_search"
, body = makeRequestBody field query from size
, body = body
, expect =
Http.expectJson
(RemoteData.fromResult >> QueryResponse)