improve packages search query (#102)
This commit is contained in:
parent
0fb5f699b9
commit
c420d05815
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -31,3 +31,4 @@ dist
|
||||||
package-lock.json
|
package-lock.json
|
||||||
result
|
result
|
||||||
scripts/eval-*
|
scripts/eval-*
|
||||||
|
eval-*
|
||||||
|
|
18
README.md
18
README.md
|
@ -18,6 +18,24 @@ For backend we are using Elasticsearch instance which is kindly sponsored by
|
||||||
[Elm](https://elm-lang.org).
|
[Elm](https://elm-lang.org).
|
||||||
|
|
||||||
|
|
||||||
|
## How search works?
|
||||||
|
|
||||||
|
The use case we want to solve is that a visitor want to see if a package
|
||||||
|
exists or to look up certain package's details.
|
||||||
|
|
||||||
|
A user wants to converge to a single result if possible. The more characters
|
||||||
|
are added to a search query the more narrow is search is and we should show
|
||||||
|
less results.
|
||||||
|
|
||||||
|
Very important is also ranking of search results. This will bring more relevant
|
||||||
|
search results to the top, since a lot of times it is hard to produce search
|
||||||
|
query that will output only one result item.
|
||||||
|
|
||||||
|
A less important, but providing better user experience. are suggestions for
|
||||||
|
writing better search query. Suggesting feature should guide user to write
|
||||||
|
better queries which in turn will produce better results.
|
||||||
|
|
||||||
|
|
||||||
## Ideas we want to explore
|
## Ideas we want to explore
|
||||||
|
|
||||||
Apart from searching packages and options we would like to:
|
Apart from searching packages and options we would like to:
|
||||||
|
|
2
elm.json
2
elm.json
|
@ -12,6 +12,7 @@
|
||||||
"elm/html": "1.0.0",
|
"elm/html": "1.0.0",
|
||||||
"elm/http": "2.0.0",
|
"elm/http": "2.0.0",
|
||||||
"elm/json": "1.1.3",
|
"elm/json": "1.1.3",
|
||||||
|
"elm/regex": "1.0.0",
|
||||||
"elm/url": "1.0.0",
|
"elm/url": "1.0.0",
|
||||||
"hecrj/html-parser": "2.3.4",
|
"hecrj/html-parser": "2.3.4",
|
||||||
"krisajenkins/remotedata": "6.0.1",
|
"krisajenkins/remotedata": "6.0.1",
|
||||||
|
@ -21,7 +22,6 @@
|
||||||
"elm/bytes": "1.0.8",
|
"elm/bytes": "1.0.8",
|
||||||
"elm/file": "1.0.5",
|
"elm/file": "1.0.5",
|
||||||
"elm/parser": "1.1.0",
|
"elm/parser": "1.1.0",
|
||||||
"elm/regex": "1.0.0",
|
|
||||||
"elm/time": "1.0.0",
|
"elm/time": "1.0.0",
|
||||||
"elm/virtual-dom": "1.0.2",
|
"elm/virtual-dom": "1.0.2",
|
||||||
"rtfeldman/elm-hex": "1.0.0"
|
"rtfeldman/elm-hex": "1.0.0"
|
||||||
|
|
|
@ -13,7 +13,6 @@
|
||||||
import boto3
|
import boto3
|
||||||
import botocore
|
import botocore
|
||||||
import botocore.client
|
import botocore.client
|
||||||
import xml.etree.ElementTree
|
|
||||||
import click
|
import click
|
||||||
import click_log
|
import click_log
|
||||||
import elasticsearch
|
import elasticsearch
|
||||||
|
@ -22,10 +21,12 @@ import json
|
||||||
import logging
|
import logging
|
||||||
import os.path
|
import os.path
|
||||||
import pypandoc
|
import pypandoc
|
||||||
|
import re
|
||||||
import requests
|
import requests
|
||||||
import shlex
|
import shlex
|
||||||
import subprocess
|
import subprocess
|
||||||
import tqdm
|
import tqdm
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
logger = logging.getLogger("import-channel")
|
logger = logging.getLogger("import-channel")
|
||||||
click_log.basic_config(logger)
|
click_log.basic_config(logger)
|
||||||
|
@ -33,7 +34,7 @@ click_log.basic_config(logger)
|
||||||
|
|
||||||
S3_BUCKET = "nix-releases"
|
S3_BUCKET = "nix-releases"
|
||||||
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
INDEX_SCHEMA_VERSION = 5
|
INDEX_SCHEMA_VERSION = 6
|
||||||
CHANNELS = {
|
CHANNELS = {
|
||||||
"unstable": {
|
"unstable": {
|
||||||
"packages": "nixpkgs/nixpkgs-20.09pre",
|
"packages": "nixpkgs/nixpkgs-20.09pre",
|
||||||
|
@ -49,11 +50,18 @@ CHANNELS = {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
ANALYSIS = {
|
ANALYSIS = {
|
||||||
"analyzer": {
|
"normalizer": {
|
||||||
"nixAttrName": {
|
"lowercase": {
|
||||||
"type": "custom",
|
"type": "custom",
|
||||||
"tokenizer": "nix_attrname",
|
"char_filter": [],
|
||||||
"filter": ["lowercase", "nix_stopwords"],
|
"filter": ["lowercase"],
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"analyzer": {
|
||||||
|
"lowercase": {
|
||||||
|
"type": "custom",
|
||||||
|
"tokenizer": "keyword",
|
||||||
|
"filter": ["lowercase"],
|
||||||
},
|
},
|
||||||
"nixOptionName": {
|
"nixOptionName": {
|
||||||
"type": "custom",
|
"type": "custom",
|
||||||
|
@ -67,13 +75,24 @@ ANALYSIS = {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"tokenizer": {
|
"tokenizer": {
|
||||||
"nix_attrname": {
|
"nix_package_query": {
|
||||||
|
"type": "pattern",
|
||||||
|
"pattern": "|".join(
|
||||||
|
[
|
||||||
|
"[ ]",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"nix_package_attr_name": {
|
||||||
"type": "pattern",
|
"type": "pattern",
|
||||||
# Split on attrname separators like _, .
|
# Split on attrname separators like _, .
|
||||||
"pattern": "|".join(
|
"pattern": "|".join(
|
||||||
[
|
[
|
||||||
"[_.-]", # Common separators like underscores, dots and dashes
|
"[_.-]", # Common separators like underscores, dots and dashes
|
||||||
"\\d+?Packages", # python37Packages -> python
|
"\\d+?Packages", # python37Packages -> python
|
||||||
|
"\\d+?Plugins", # vimPlugins -> vim
|
||||||
|
"\\d+?Extensions", # php74Extensions -> php
|
||||||
|
"\\d+?Interpreters", # perlInterpreters -> perl
|
||||||
# Camelcase tokenizer adapted from
|
# Camelcase tokenizer adapted from
|
||||||
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
|
||||||
"".join(
|
"".join(
|
||||||
|
@ -118,7 +137,18 @@ ANALYSIS = {
|
||||||
"nix_stopwords": {
|
"nix_stopwords": {
|
||||||
"type": "stop",
|
"type": "stop",
|
||||||
"ignore_case": True,
|
"ignore_case": True,
|
||||||
"stopwords": ["packages", "package", "options", "option"],
|
"stopwords": [
|
||||||
|
"packages",
|
||||||
|
"package",
|
||||||
|
"options",
|
||||||
|
"option",
|
||||||
|
"plugins",
|
||||||
|
"plugin",
|
||||||
|
"extensions",
|
||||||
|
"extension",
|
||||||
|
"interpreters",
|
||||||
|
"interpreter",
|
||||||
|
],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -146,12 +176,21 @@ MAPPING = {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"package_attr_name": {
|
"package_attr_name": {
|
||||||
"type": "text",
|
"type": "keyword",
|
||||||
"analyzer": "nixAttrName",
|
"normalizer": "lowercase",
|
||||||
"fields": {"raw": {"type": "keyword"}},
|
},
|
||||||
|
"package_attr_name_query": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "lowercase",
|
||||||
|
},
|
||||||
|
"package_attr_set": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "lowercase",
|
||||||
|
},
|
||||||
|
"package_pname": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "lowercase",
|
||||||
},
|
},
|
||||||
"package_attr_set": {"type": "keyword"},
|
|
||||||
"package_pname": {"type": "keyword"},
|
|
||||||
"package_pversion": {"type": "keyword"},
|
"package_pversion": {"type": "keyword"},
|
||||||
"package_description": {"type": "text"},
|
"package_description": {"type": "text"},
|
||||||
"package_longDescription": {"type": "text"},
|
"package_longDescription": {"type": "text"},
|
||||||
|
@ -195,6 +234,39 @@ MAPPING = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def split_query(text):
|
||||||
|
"""Tokenize package attr_name
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
python37Packages.test_name-test
|
||||||
|
= index: 0
|
||||||
|
- python37Packages.test1_name-test2
|
||||||
|
- python37Packages.test1_name
|
||||||
|
- python37Packages.test1
|
||||||
|
- python37
|
||||||
|
- python
|
||||||
|
= index: 1
|
||||||
|
- test1_name-test2
|
||||||
|
- test1_name
|
||||||
|
- test1
|
||||||
|
= index: 2
|
||||||
|
- name-test2
|
||||||
|
- name
|
||||||
|
= index: 3
|
||||||
|
- test2
|
||||||
|
"""
|
||||||
|
tokens = []
|
||||||
|
regex = re.compile(".+?(?:(?<=[a-z])(?=[1-9A-Z])|(?<=[1-9A-Z])(?=[A-Z][a-z])|[\._-]|$)")
|
||||||
|
parts = [m.group(0) for m in regex.finditer(text)]
|
||||||
|
for index in range(len(parts)):
|
||||||
|
prev_parts = ""
|
||||||
|
for part in parts[index:]:
|
||||||
|
tokens.append((prev_parts + part).rstrip("_.-"))
|
||||||
|
prev_parts += part
|
||||||
|
return tokens
|
||||||
|
|
||||||
|
|
||||||
def get_last_evaluation(prefix):
|
def get_last_evaluation(prefix):
|
||||||
logger.debug(f"Retriving last evaluation for {prefix} prefix.")
|
logger.debug(f"Retriving last evaluation for {prefix} prefix.")
|
||||||
|
|
||||||
|
@ -265,6 +337,63 @@ def get_evaluation_builds(evaluation_id):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def get_maintainer(maintainer):
|
||||||
|
maintainers = []
|
||||||
|
|
||||||
|
if type(maintainer) == str:
|
||||||
|
maintainers.append(dict(
|
||||||
|
name=maintainer,
|
||||||
|
email=None,
|
||||||
|
github=None,
|
||||||
|
))
|
||||||
|
|
||||||
|
elif type(maintainer) == dict:
|
||||||
|
maintainers.append(dict(
|
||||||
|
name=maintainer.get("name"),
|
||||||
|
email=maintainer.get("email"),
|
||||||
|
github=maintainer.get("github"),
|
||||||
|
))
|
||||||
|
|
||||||
|
elif type(maintainer) == list:
|
||||||
|
for item in maintainer:
|
||||||
|
maintainers += get_maintainer(item)
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.error(f"maintainer can not be recognized from: {maintainer}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
return maintainers
|
||||||
|
|
||||||
|
|
||||||
|
def remove_attr_set(name):
|
||||||
|
# some package sets the prefix is included in pname
|
||||||
|
sets = [
|
||||||
|
# Packages
|
||||||
|
"emscripten",
|
||||||
|
"lua",
|
||||||
|
"php",
|
||||||
|
"pure",
|
||||||
|
"python",
|
||||||
|
"lisp",
|
||||||
|
"perl",
|
||||||
|
"ruby",
|
||||||
|
# Plugins
|
||||||
|
"elasticsearch",
|
||||||
|
"graylog",
|
||||||
|
"tmuxplugin"
|
||||||
|
"vimplugin"
|
||||||
|
]
|
||||||
|
# TODO: is this correct
|
||||||
|
if any([name.startswith(i) for i in sets]):
|
||||||
|
name = "-".join(name.split("-")[1:])
|
||||||
|
|
||||||
|
# node does things a bit different
|
||||||
|
elif name.startswith("node_"):
|
||||||
|
name = name[len("node_"):]
|
||||||
|
|
||||||
|
return name
|
||||||
|
|
||||||
|
|
||||||
def get_packages(evaluation, evaluation_builds):
|
def get_packages(evaluation, evaluation_builds):
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"get_packages: Retriving list of packages for '{evaluation['git_revision']}' revision"
|
f"get_packages: Retriving list of packages for '{evaluation['git_revision']}' revision"
|
||||||
|
@ -281,6 +410,7 @@ def get_packages(evaluation, evaluation_builds):
|
||||||
|
|
||||||
def gen():
|
def gen():
|
||||||
for attr_name, data in packages:
|
for attr_name, data in packages:
|
||||||
|
|
||||||
position = data["meta"].get("position")
|
position = data["meta"].get("position")
|
||||||
if position and position.startswith("/nix/store"):
|
if position and position.startswith("/nix/store"):
|
||||||
position = position[44:]
|
position = position[44:]
|
||||||
|
@ -300,16 +430,7 @@ def get_packages(evaluation, evaluation_builds):
|
||||||
else:
|
else:
|
||||||
licenses = []
|
licenses = []
|
||||||
|
|
||||||
maintainers = [
|
maintainers = get_maintainer(data["meta"].get("maintainers", []))
|
||||||
type(maintainer) == str
|
|
||||||
and dict(name=maintainer, email=None, github=None)
|
|
||||||
or dict(
|
|
||||||
name=maintainer.get("name"),
|
|
||||||
email=maintainer.get("email"),
|
|
||||||
github=maintainer.get("github"),
|
|
||||||
)
|
|
||||||
for maintainer in data["meta"].get("maintainers", [])
|
|
||||||
]
|
|
||||||
|
|
||||||
platforms = [
|
platforms = [
|
||||||
type(platform) == str and platform or None
|
type(platform) == str and platform or None
|
||||||
|
@ -319,9 +440,9 @@ def get_packages(evaluation, evaluation_builds):
|
||||||
attr_set = None
|
attr_set = None
|
||||||
if "." in attr_name:
|
if "." in attr_name:
|
||||||
attr_set = attr_name.split(".")[0]
|
attr_set = attr_name.split(".")[0]
|
||||||
if not attr_set.endswith("Packages") and not attr_set.endswith(
|
if not attr_set.endswith("Packages") and \
|
||||||
"Plugins"
|
not attr_set.endswith("Plugins") and \
|
||||||
):
|
not attr_set.endswith("Extensions"):
|
||||||
attr_set = None
|
attr_set = None
|
||||||
|
|
||||||
hydra = None
|
hydra = None
|
||||||
|
@ -349,8 +470,9 @@ def get_packages(evaluation, evaluation_builds):
|
||||||
type="package",
|
type="package",
|
||||||
package_hydra=hydra,
|
package_hydra=hydra,
|
||||||
package_attr_name=attr_name,
|
package_attr_name=attr_name,
|
||||||
|
package_attr_name_query=list(split_query(attr_name)),
|
||||||
package_attr_set=attr_set,
|
package_attr_set=attr_set,
|
||||||
package_pname=data["pname"],
|
package_pname=remove_attr_set(data["pname"]),
|
||||||
package_pversion=data["version"],
|
package_pversion=data["version"],
|
||||||
package_description=data["meta"].get("description"),
|
package_description=data["meta"].get("description"),
|
||||||
package_longDescription=data["meta"].get("longDescription", ""),
|
package_longDescription=data["meta"].get("longDescription", ""),
|
||||||
|
@ -405,7 +527,7 @@ def get_options(evaluation):
|
||||||
# we first check if there are some xml elements before using pypandoc
|
# we first check if there are some xml elements before using pypandoc
|
||||||
# since pypandoc calls are quite slow
|
# since pypandoc calls are quite slow
|
||||||
root = xml.etree.ElementTree.fromstring(xml_description)
|
root = xml.etree.ElementTree.fromstring(xml_description)
|
||||||
if len(root.find('para').getchildren()) > 0:
|
if len(list(root.find('para'))) > 0:
|
||||||
description = pypandoc.convert_text(
|
description = pypandoc.convert_text(
|
||||||
xml_description,
|
xml_description,
|
||||||
"html",
|
"html",
|
||||||
|
|
|
@ -5,8 +5,38 @@
|
||||||
# Enable recursion into attribute sets that nix-env normally doesn't look into
|
# Enable recursion into attribute sets that nix-env normally doesn't look into
|
||||||
# so that we can get a more complete picture of the available packages for the
|
# so that we can get a more complete picture of the available packages for the
|
||||||
# purposes of the index.
|
# purposes of the index.
|
||||||
packageOverrides = super: {
|
packageOverrides = super:
|
||||||
haskellPackages = super.recurseIntoAttrs super.haskellPackages;
|
let
|
||||||
rPackages = super.recurseIntoAttrs super.rPackages;
|
recurseIntoAttrs = sets:
|
||||||
};
|
super.lib.genAttrs
|
||||||
|
(builtins.filter (set: builtins.hasAttr set super) sets)
|
||||||
|
(set: super.recurseIntoAttrs (builtins.getAttr set super));
|
||||||
|
in recurseIntoAttrs [
|
||||||
|
"roundcubePlugins"
|
||||||
|
"emscriptenfastcompPackages"
|
||||||
|
"fdbPackages"
|
||||||
|
"nodePackages_latest"
|
||||||
|
"nodePackages"
|
||||||
|
"platformioPackages"
|
||||||
|
"haskellPackages"
|
||||||
|
"idrisPackages"
|
||||||
|
"sconsPackages"
|
||||||
|
"gns3Packages"
|
||||||
|
"quicklispPackagesClisp"
|
||||||
|
"quicklispPackagesSBCL"
|
||||||
|
"rPackages"
|
||||||
|
"apacheHttpdPackages_2_4"
|
||||||
|
"zabbix44"
|
||||||
|
"zabbix40"
|
||||||
|
"zabbix30"
|
||||||
|
"fusePackages"
|
||||||
|
"nvidiaPackages"
|
||||||
|
"sourceHanPackages"
|
||||||
|
"atomPackages"
|
||||||
|
"emacs25Packages"
|
||||||
|
"emacs26Packages"
|
||||||
|
"steamPackages"
|
||||||
|
"ut2004Packages"
|
||||||
|
"zeroadPackages"
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ import Html
|
||||||
, dl
|
, dl
|
||||||
, dt
|
, dt
|
||||||
, li
|
, li
|
||||||
|
, p
|
||||||
, table
|
, table
|
||||||
, tbody
|
, tbody
|
||||||
, td
|
, td
|
||||||
|
@ -42,6 +43,7 @@ import Http
|
||||||
import Json.Decode
|
import Json.Decode
|
||||||
import Json.Decode.Pipeline
|
import Json.Decode.Pipeline
|
||||||
import Json.Encode
|
import Json.Encode
|
||||||
|
import Regex
|
||||||
import Search
|
import Search
|
||||||
|
|
||||||
|
|
||||||
|
@ -186,13 +188,33 @@ viewResultItem channel show item =
|
||||||
else
|
else
|
||||||
[]
|
[]
|
||||||
in
|
in
|
||||||
tr [ onClick (SearchMsg (Search.ShowDetails item.source.attr_name)) ]
|
[]
|
||||||
[ td [] [ text item.source.attr_name ]
|
-- DEBUG: |> List.append
|
||||||
, td [] [ text item.source.pname ]
|
-- DEBUG: [ tr []
|
||||||
, td [] [ text item.source.pversion ]
|
-- DEBUG: [ td [ colspan 4 ]
|
||||||
, td [] [ text <| Maybe.withDefault "" item.source.description ]
|
-- DEBUG: [ p [] [ text <| "score: " ++ String.fromFloat item.score ]
|
||||||
]
|
-- DEBUG: , p []
|
||||||
:: packageDetails
|
-- DEBUG: [ text <|
|
||||||
|
-- DEBUG: "matched queries: "
|
||||||
|
-- DEBUG: , ul []
|
||||||
|
-- DEBUG: (item.matched_queries
|
||||||
|
-- DEBUG: |> Maybe.withDefault []
|
||||||
|
-- DEBUG: |> List.sort
|
||||||
|
-- DEBUG: |> List.map (\q -> li [] [ text q ])
|
||||||
|
-- DEBUG: )
|
||||||
|
-- DEBUG: ]
|
||||||
|
-- DEBUG: ]
|
||||||
|
-- DEBUG: ]
|
||||||
|
-- DEBUG: ]
|
||||||
|
|> List.append
|
||||||
|
(tr [ onClick (SearchMsg (Search.ShowDetails item.source.attr_name)) ]
|
||||||
|
[ td [] [ text <| item.source.attr_name ]
|
||||||
|
, td [] [ text item.source.pname ]
|
||||||
|
, td [] [ text item.source.pversion ]
|
||||||
|
, td [] [ text <| Maybe.withDefault "" item.source.description ]
|
||||||
|
]
|
||||||
|
:: packageDetails
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
viewResultItemDetails :
|
viewResultItemDetails :
|
||||||
|
@ -345,126 +367,232 @@ makeRequestBody :
|
||||||
-> Int
|
-> Int
|
||||||
-> Int
|
-> Int
|
||||||
-> Http.Body
|
-> Http.Body
|
||||||
makeRequestBody query from size =
|
makeRequestBody queryRaw from size =
|
||||||
-- Prefix Query
|
|
||||||
-- example query for "python"
|
|
||||||
-- {
|
|
||||||
-- "from": 0,
|
|
||||||
-- "size": 10,
|
|
||||||
-- "query": {
|
|
||||||
-- "bool": {
|
|
||||||
-- "filter": {
|
|
||||||
-- "match": {
|
|
||||||
-- "type": "package"
|
|
||||||
-- }
|
|
||||||
-- },
|
|
||||||
-- "must": {
|
|
||||||
-- "bool": {
|
|
||||||
-- "should": [
|
|
||||||
-- {
|
|
||||||
-- "multi_match": {
|
|
||||||
-- "query": "python",
|
|
||||||
-- "boost": 1,
|
|
||||||
-- "fields": [
|
|
||||||
-- "package_attr_name.raw",
|
|
||||||
-- "package_attr_name"
|
|
||||||
-- ],
|
|
||||||
-- "type": "most_fields"
|
|
||||||
-- }
|
|
||||||
-- },
|
|
||||||
-- {
|
|
||||||
-- "term": {
|
|
||||||
-- "type": {
|
|
||||||
-- "value": "package",
|
|
||||||
-- "boost": 0
|
|
||||||
-- }
|
|
||||||
-- }
|
|
||||||
-- },
|
|
||||||
-- {
|
|
||||||
-- "term": {
|
|
||||||
-- "package_pname": {
|
|
||||||
-- "value": "python",
|
|
||||||
-- "boost": 2
|
|
||||||
-- }
|
|
||||||
-- }
|
|
||||||
-- },
|
|
||||||
-- {
|
|
||||||
-- "term": {
|
|
||||||
-- "package_pversion": {
|
|
||||||
-- "value": "python",
|
|
||||||
-- "boost": 0.2
|
|
||||||
-- }
|
|
||||||
-- }
|
|
||||||
-- },
|
|
||||||
-- {
|
|
||||||
-- "term": {
|
|
||||||
-- "package_description": {
|
|
||||||
-- "value": "python",
|
|
||||||
-- "boost": 0.3
|
|
||||||
-- }
|
|
||||||
-- }
|
|
||||||
-- },
|
|
||||||
-- {
|
|
||||||
-- "term": {
|
|
||||||
-- "package_longDescription": {
|
|
||||||
-- "value": "python",
|
|
||||||
-- "boost": 0.1
|
|
||||||
-- }
|
|
||||||
-- }
|
|
||||||
-- }
|
|
||||||
-- ]
|
|
||||||
-- }
|
|
||||||
-- }
|
|
||||||
-- }
|
|
||||||
-- }
|
|
||||||
-- }
|
|
||||||
let
|
let
|
||||||
listIn name type_ value =
|
query =
|
||||||
[ ( name, Json.Encode.list type_ value ) ]
|
queryRaw
|
||||||
|
|> String.trim
|
||||||
|
|
||||||
objectIn name value =
|
delimiters =
|
||||||
[ ( name, Json.Encode.object value ) ]
|
Maybe.withDefault Regex.never (Regex.fromString "[. ]")
|
||||||
|
|
||||||
encodeTerm ( name, boost ) =
|
should_match boost_base =
|
||||||
[ ( "value", Json.Encode.string query )
|
List.indexedMap
|
||||||
, ( "boost", Json.Encode.float boost )
|
(\i ( field, boost ) ->
|
||||||
]
|
[ ( "match"
|
||||||
|> objectIn name
|
, Json.Encode.object
|
||||||
|> objectIn "term"
|
[ ( field
|
||||||
in
|
, Json.Encode.object
|
||||||
[ ( "package_pname", 2.0 )
|
[ ( "query", Json.Encode.string query )
|
||||||
, ( "package_pversion", 0.2 )
|
, ( "boost", Json.Encode.float boost )
|
||||||
, ( "package_description", 0.3 )
|
, ( "analyzer", Json.Encode.string "whitespace" )
|
||||||
, ( "package_longDescription", 0.1 )
|
, ( "fuzziness", Json.Encode.string "1" )
|
||||||
]
|
, ( "_name"
|
||||||
|> List.map encodeTerm
|
, Json.Encode.string <|
|
||||||
|> List.append
|
"should_match_"
|
||||||
[ [ "package_attr_name.raw"
|
++ String.fromInt (i + 1)
|
||||||
, "package_attr_name"
|
)
|
||||||
]
|
]
|
||||||
|> listIn "fields" Json.Encode.string
|
)
|
||||||
|> List.append
|
]
|
||||||
[ ( "query", Json.Encode.string query )
|
)
|
||||||
, ( "boost", Json.Encode.float 1.0 )
|
|
||||||
]
|
]
|
||||||
|> objectIn "multi_match"
|
)
|
||||||
|
[ ( "package_attr_name", 1 )
|
||||||
|
, ( "package_attr_name_query", 1 )
|
||||||
|
, ( "package_pname", 1 )
|
||||||
|
, ( "package_description", 1 )
|
||||||
|
, ( "package_longDescription", 1 )
|
||||||
|
]
|
||||||
|
|
||||||
|
should_match_bool_prefix boost_base =
|
||||||
|
List.indexedMap
|
||||||
|
(\i ( field, boost ) ->
|
||||||
|
[ ( "match_bool_prefix"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( field
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "query", Json.Encode.string query )
|
||||||
|
, ( "boost", Json.Encode.float boost )
|
||||||
|
, ( "analyzer", Json.Encode.string "whitespace" )
|
||||||
|
, ( "fuzziness", Json.Encode.string "1" )
|
||||||
|
, ( "_name"
|
||||||
|
, Json.Encode.string <|
|
||||||
|
"should_match_bool_prefix_"
|
||||||
|
++ String.fromInt (i + 1)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
[ ( "package_attr_name", 1 )
|
||||||
|
, ( "package_attr_name_query", 1 )
|
||||||
|
, ( "package_pname", 1 )
|
||||||
|
]
|
||||||
|
|
||||||
|
should_terms boost_base =
|
||||||
|
List.indexedMap
|
||||||
|
(\i ( field, boost ) ->
|
||||||
|
[ ( "terms"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( field
|
||||||
|
, Json.Encode.list Json.Encode.string (Regex.split delimiters query)
|
||||||
|
)
|
||||||
|
, ( "boost", Json.Encode.float <| boost_base * boost )
|
||||||
|
, ( "_name"
|
||||||
|
, Json.Encode.string <|
|
||||||
|
"should_terms_"
|
||||||
|
++ String.fromInt (i + 1)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
[ ( "package_attr_name", 1 )
|
||||||
|
, ( "package_attr_name_query", 1 )
|
||||||
|
, ( "package_pname", 1 )
|
||||||
|
, ( "package_attr_set", 1 )
|
||||||
|
]
|
||||||
|
|
||||||
|
should_term boost_base =
|
||||||
|
List.indexedMap
|
||||||
|
(\i ( field, boost ) ->
|
||||||
|
[ ( "term"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( field
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "value", Json.Encode.string query )
|
||||||
|
, ( "boost", Json.Encode.float <| boost_base * boost )
|
||||||
|
, ( "_name"
|
||||||
|
, Json.Encode.string <|
|
||||||
|
"should_term_"
|
||||||
|
++ String.fromInt (i + 1)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
[ ( "package_attr_name", 1 )
|
||||||
|
, ( "package_attr_name_query", 1 )
|
||||||
|
, ( "package_pname", 1 )
|
||||||
|
]
|
||||||
|
|
||||||
|
filter_packages =
|
||||||
|
( "term"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "type"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "value", Json.Encode.string "package" )
|
||||||
|
, ( "_name", Json.Encode.string "filter_packages" )
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
filter_queries =
|
||||||
|
let
|
||||||
|
filterQuery =
|
||||||
|
query
|
||||||
|
|> String.replace "." " "
|
||||||
|
in
|
||||||
|
filterQuery
|
||||||
|
|> String.words
|
||||||
|
|> List.indexedMap
|
||||||
|
(\i query_word ->
|
||||||
|
let
|
||||||
|
isLast =
|
||||||
|
List.length (String.words filterQuery) == i + 1
|
||||||
|
in
|
||||||
|
[ if isLast then
|
||||||
|
( "bool"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "should"
|
||||||
|
, Json.Encode.list Json.Encode.object
|
||||||
|
[ [ ( "match"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "package_attr_name_query"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "query", Json.Encode.string query_word )
|
||||||
|
, ( "fuzziness", Json.Encode.string "1" )
|
||||||
|
, ( "_name", Json.Encode.string <| "filter_queries_" ++ String.fromInt (i + 1) ++ "_should_match" )
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
, [ ( "match_bool_prefix"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "package_attr_name_query"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "query", Json.Encode.string query_word )
|
||||||
|
, ( "_name"
|
||||||
|
, Json.Encode.string <| "filter_queries_" ++ String.fromInt (i + 1) ++ "_should_prefix"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
else
|
||||||
|
( "match_bool_prefix"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "package_attr_name_query"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "query", Json.Encode.string query_word )
|
||||||
|
, ( "_name"
|
||||||
|
, Json.Encode.string <| "filter_queries_" ++ String.fromInt (i + 1) ++ "_prefix"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
in
|
||||||
|
Http.jsonBody
|
||||||
|
(Json.Encode.object
|
||||||
|
[ ( "from"
|
||||||
|
, Json.Encode.int from
|
||||||
|
)
|
||||||
|
, ( "size"
|
||||||
|
, Json.Encode.int size
|
||||||
|
)
|
||||||
|
, ( "query"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "bool"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "filter"
|
||||||
|
, Json.Encode.list Json.Encode.object
|
||||||
|
(List.append
|
||||||
|
[ [ filter_packages ] ]
|
||||||
|
filter_queries
|
||||||
|
)
|
||||||
|
)
|
||||||
|
, ( "should"
|
||||||
|
, Json.Encode.list
|
||||||
|
Json.Encode.object
|
||||||
|
([]
|
||||||
|
|> List.append (should_term 10000)
|
||||||
|
|> List.append (should_terms 1000)
|
||||||
|
|> List.append (should_match_bool_prefix 100)
|
||||||
|
|> List.append (should_match 10)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
]
|
]
|
||||||
|> listIn "should" Json.Encode.object
|
)
|
||||||
|> objectIn "bool"
|
|
||||||
|> objectIn "must"
|
|
||||||
|> ([ ( "type", Json.Encode.string "package" ) ]
|
|
||||||
|> objectIn "match"
|
|
||||||
|> objectIn "filter"
|
|
||||||
|> List.append
|
|
||||||
)
|
|
||||||
|> objectIn "bool"
|
|
||||||
|> objectIn "query"
|
|
||||||
|> List.append
|
|
||||||
[ ( "from", Json.Encode.int from )
|
|
||||||
, ( "size", Json.Encode.int size )
|
|
||||||
]
|
|
||||||
|> Json.Encode.object
|
|
||||||
|> Http.jsonBody
|
|
||||||
|
|
||||||
|
|
||||||
makeRequest :
|
makeRequest :
|
||||||
|
|
|
@ -91,6 +91,7 @@ type alias ResultItem a =
|
||||||
, id : String
|
, id : String
|
||||||
, score : Float
|
, score : Float
|
||||||
, source : a
|
, source : a
|
||||||
|
, matched_queries : Maybe (List String)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -622,8 +623,9 @@ decodeResultHitsTotal =
|
||||||
|
|
||||||
decodeResultItem : Json.Decode.Decoder a -> Json.Decode.Decoder (ResultItem a)
|
decodeResultItem : Json.Decode.Decoder a -> Json.Decode.Decoder (ResultItem a)
|
||||||
decodeResultItem decodeResultItemSource =
|
decodeResultItem decodeResultItemSource =
|
||||||
Json.Decode.map4 ResultItem
|
Json.Decode.map5 ResultItem
|
||||||
(Json.Decode.field "_index" Json.Decode.string)
|
(Json.Decode.field "_index" Json.Decode.string)
|
||||||
(Json.Decode.field "_id" Json.Decode.string)
|
(Json.Decode.field "_id" Json.Decode.string)
|
||||||
(Json.Decode.field "_score" Json.Decode.float)
|
(Json.Decode.field "_score" Json.Decode.float)
|
||||||
(Json.Decode.field "_source" decodeResultItemSource)
|
(Json.Decode.field "_source" decodeResultItemSource)
|
||||||
|
(Json.Decode.maybe (Json.Decode.field "matched_queries" (Json.Decode.list Json.Decode.string)))
|
||||||
|
|
|
@ -6,7 +6,7 @@ const {Elm} = require('./Main');
|
||||||
|
|
||||||
Elm.Main.init({
|
Elm.Main.init({
|
||||||
flags: {
|
flags: {
|
||||||
elasticsearchMappingSchemaVersion: process.env.ELASTICSEARCH_MAPPING_SCHEMA_VERSION || 5,
|
elasticsearchMappingSchemaVersion: process.env.ELASTICSEARCH_MAPPING_SCHEMA_VERSION || 6,
|
||||||
elasticsearchUrl: process.env.ELASTICSEARCH_URL || 'https://nixos-search-5886075189.us-east-1.bonsaisearch.net:443',
|
elasticsearchUrl: process.env.ELASTICSEARCH_URL || 'https://nixos-search-5886075189.us-east-1.bonsaisearch.net:443',
|
||||||
elasticsearchUsername : process.env.ELASTICSEARCH_USERNAME || 'z3ZFJ6y2mR',
|
elasticsearchUsername : process.env.ELASTICSEARCH_USERNAME || 'z3ZFJ6y2mR',
|
||||||
elasticsearchPassword : process.env.ELASTICSEARCH_PASSWORD || 'ds8CEvALPf9pui7XG'
|
elasticsearchPassword : process.env.ELASTICSEARCH_PASSWORD || 'ds8CEvALPf9pui7XG'
|
||||||
|
|
Loading…
Reference in a new issue