aux-search/flake-info/src/elastic.rs

512 lines
16 KiB
Rust
Raw Normal View History

use std::collections::HashMap;
2021-08-17 08:55:08 +00:00
use clap::arg_enum;
pub use elasticsearch::http::transport::Transport;
use elasticsearch::{http::response, indices::*, BulkOperation, Elasticsearch as Client};
2021-08-17 08:55:08 +00:00
use lazy_static::lazy_static;
use log::{info, warn};
use serde_json::{json, Value};
use thiserror::Error;
use crate::data::Export;
lazy_static! {
static ref MAPPING: Value = json!({
"mappings": {
"properties": {
"type": {"type": "keyword"},
"flake_name": {
"type": "text",
"analyzer": "english",
},
"flake_description": {
"type": "text",
"analyzer": "english",
},
"flake_resolved": {
"type": "nested",
"properties": {
"type": {
"type": "keyword"
},
"owner": {
"type": "keyword"
},
"repo": {
"type": "keyword"
},
Flake support/frontend (#324) * Setup flake info extraction Prepare data model fro derivations (#1) Add flake info data (#1) Implement fetching general flake info (#1) Expose CLI (#1) Keep cargo happy Add some doc comments Pin to local nixpkgs to excessive downloads Extend visibility of some data objects Add command to extract infomation about defivations (#1) Add call new feature in main (#1) Include more information in derivation (#1) Add log access Always debug log stderr from nix Format nix script Collect systems per package Remove unnecessary imports Create flake Remove top level version field Represent collected systems/version pairs in rust Fix quotation marks in tests Add correct cargo hash Add iconv dependency Return a list from nix script Export as json Undo version by platform distinction Remove nixpkgs override Apply cargo fmt Flatten export structure Allow for complex licenses Prepare using a central nix file Implement nix part o accessing apps Include the correct filename Add accessor for `all` key Access all available information by default Track more information about Apps Run cargo fmt Fix: allow local builds Prepare next version of the flake info tool Include examples and pull script Expose flake info as library Include thiserror for custom errors Define a source data type Collects source types and their metadata, collected in a json file Add command line argument for input files Mutually exclusive with --flake Refactor functions to extract information given a flake identifier Add kind specifier as CLI argument Amend Argument parsing to require eiteher flake or targets to be defined Run extraction for specified flake or list of flakes as specified in a json file Resolves #5 References #7 Use internal tag to distnguich target types Include target falg usage in examples Set include provided source if available (resolves #9) Resolve flake name Update examples Dont include empty license or description Fix a misfomatting in cargot.toml Add elastic dependencies Implement a wrapper around the elasticsearch client Implements pushing exports (#4) Temporarily skip serializing an unimplemented field in elastic output Extract reading source list files from binary Add lazy_static as dependency Implement createing and pushing to elastic index Add elastic options Provide default name and env falbac for elastic index Modify app binary and type as optionals App can be a derivation too Update examples Add more elastic commands Supported: - ensure - clear - push Rename elastic search config struct Add elastic push support to binary Rename flag to enable elastic push Imporve error messages and format binary source Fix nix file incorrectly expecting meta fields Changing flake descriotions to an optional field deserialize git_ref as hash Implement temporary stores and gc of these prevents flakes from accessing store paths Pass extra arguments to nix Update cargo hash and skip integration tests Move flake.nix to root folder and add apps for all components Fix command invocation that fails test Update README(s) Add help for extra arguments (cherry picked from commit be4bc3dd929178bef66114c2201aaa88e47e9add) * Safely read legacyPackages * Read nixosOptions from flake * Update ES Mapping * Show more detailed error and backtrace if available * Try reading options only if key is defined * Format nix script * Add error context when attempting deserialization * Fix derivation representation to fit nix output * Add push elasticsearch settings * Add Flake channel * Rename import module * Remove Flakes Channel * Prepare nixpkgs import * Separate import/export types * Break up nixpkgs package representation * Use the same naming scheme for Nixpkgs entries and flake entries * Document import module * Remove serialization attributes * Reversable type and SerDe implemetation * Add *_reverse fields * Unpublicating export fields * Read from NixOption struct * serialize empty fields as null * Tag export json variants * Serialize a single option-declaration * Format npkgs parse test * Define NixOption Sorry thats too late.. * Parse system key * Make Package output compatible with the frontend * Add Url-only licesnse variant * Add StringOrStruct type * Add accessor method for elements catched by OneOrMany * New utility to flatten in homogenous lists recursively * Add Maintainer type catching maintaiers used in nixpkgs * Format Implementatio n * Remove explicit representation of platforms * Open nixpkgs parser to cover all packages * Convert all imported fields to their export representation * Define reverse fields in ES schema * Format nixpkgs command runner * Expose shorthand to pull a specific channel by command line * Extract utility functions into their own module * Implement AttributeQuery generation * Integrate query in export * Implement *_set attributes * Document purpose of export module * Use more descriptive github blob route * Complete Option Export representation Reuse the same option type for nixpkgs and flakes * Enable nixpkgs option import * Expose nixpkgs option import functions and integrate in binary * Chunk ElasticSearch Bulk operations * Address Example/Default field formatting * Add abort strategy for existing indices * New command line interface using subcommands * Document new interface * Bump version * Add nixpkgs cron job * More precise name for nixpkgs cron job * Add Flakes cron job * Read version from file and fix channel names * Correct file names for flake group import * Fix group command * Run new cron jobs on PR * Update Cargo sources * Integrate new flakes route * Add search type option * Add flake types and messages * Add flake types * Extract Request body builders and expose more types * Combine Package and Option Search * Factor out html body * Dispatch messages by search type to flake page * Correct type naming * Remove Debug instructions and unimplemented flake search type * Do not reload Flake search page while the search subject is unchanged * Implement switching subjects * Fix init type signature * Add url/git based flakes to mapping * Parse flake info * Link to flake repo and show flake maintainer * Fix optional decoded values * Add group and type as search buckts * Show search selection in all cases * Move flake decoding to search * Show flake information for options * Hardcode flakesearch to only search a specified flake index * Improve experimental state notice * Fix category select not present in some cases * Change default flake index * Show correct category title * Add missing imports * Serialize more optional fields as null * elastic-test.rs file not needed anymore * names of the workflows should be immediately obvious. * better jobs names * rename the flakes group * need to provide --elastic-schema-version via the cli option * typo * Fix errors in workflow files * Install flake enabled nix by default * Fix variable substitution * Use string group names * Provide elastic schema verion only through cli * Fix scheduled task name * Improve error reporting * Allow insecure packages here * Add missing imports * Tree-wide: cargo-fmt * only import nixpkgs for now * no importing of flakes * also bump a version * fixing cron-flakes.yml * make it obvious that this is for frontend * missed this when merging * Extract hashes - Split out revision info from flakes - Retrieve current nixpkgs revision from github * Write aliases, enabled by default * Filter additional platforms * Change alias structure * expect channel like nixpkgs identifiers * Don't cause error if push is aborted * Specify correct channel identifier * Allow options to evaluate unfree packages * Retrieve and delete specific aliases * Specify import path correclty * Fix channel warmup * Abort push if channel already indexed * Remove debug pr hook for import action * Fix indentation by tab * Make flakes show again * Fix import group naming in flake wrokflow * Rename flake group to match imported index * Run nixpkgs import on pr activity (Debugging behavior) * Just show literal Examples, resolves #336 * Use actual nixpkgs branch names * Trim derivation/option declaration path, resolves #335 Remove /nix/store/*-source prefix * Fix sidebar width and close button position * Placeholder texts in flake result area * Show flake install info * Don't show package/option selection before search * Make sure install command for nixos is always shown * Add toml source config support * Rewrite current example flake group as toml * Update flake cron job * flake-info: use saner nix packaging method When trying to first work in this I naive approached it with `nix-shell`. That of course lead to the fixed output bollocks failing with a hash mismatch. By making use of the `cargoLock` attribute on `buildRustPackage` we can tame the FOD-beast and only have to provide one hash manually for a single package (that we fetch from a GitHub repository). This also means that updating dependencies will be simpler as the native Cargo.lock file can be used. (cherry picked from commit c3a0e46d1eb56e128e6923e6c493eb836fc81e85) * Update flake lock file * Do not build python import script * add flake names to the title as well * Disable debug imports on prs Co-authored-by: Rok Garbas <rok@garbas.si> Co-authored-by: Andreas Rammhold <andreas@rammhold.de>
2021-08-25 22:40:42 +00:00
"url" : {
"type": "keyword"
}
2021-08-17 08:55:08 +00:00
}
},
"flake_source": {
"type": "nested",
"properties": {
"type": {
"type": "keyword"
},
"owner": {
"type": "keyword"
},
"repo": {
"type": "keyword"
},
"desciption": {
"type": "text",
"analyzer": "english",
},
"git_ref": {
"type": "keyword"
},
"url": {
"type": "keyword"
},
}
},
"package_attr_name": {
"type": "keyword",
"fields": {
"edge": {"type": "text", "analyzer": "edge"},
"attr_path": {"type": "text", "analyzer": "attr_path"},
"attr_path_reverse": {"type": "text", "analyzer": "attr_path_reverse"}
},
2021-08-17 08:55:08 +00:00
},
"package_attr_set": {
"type": "keyword",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_pname": {
"type": "keyword",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_pversion": {
"type": "keyword"
},
"package_platforms": {
"type": "keyword"
},
"package_system": {
"type": "keyword"
},
"package_position": {
"type": "text"
},
"package_outputs": {
"type": "keyword"
},
"package_default_output": {
"type": "keyword"
},
"package_programs": {
"type": "keyword"
},
2021-08-17 08:55:08 +00:00
"package_description": {
"type": "text",
"analyzer": "english",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_longDescription": {
"type": "text",
"analyzer": "english",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_license": {
"type": "nested",
"properties": {
"fullName": {"type": "text"},
"url": {"type": "text"}},
},
"package_license_set": {"type": "keyword"},
"package_maintainers": {
"type": "nested",
"properties": {
"name": {"type": "text"},
"email": {"type": "text"},
"github": {"type": "text"},
},
},
"package_maintainers_set": {"type": "keyword"},
"package_homepage": {
"type": "keyword"
},
// Options fields
"option_name": {
"type": "keyword",
"fields": {
"edge": {"type": "text", "analyzer": "edge"},
"attr_path": {"type": "text", "analyzer": "attr_path"},
"attr_path_reverse": {"type": "text", "analyzer": "attr_path_reverse"}
},
2021-08-17 08:55:08 +00:00
},
"option_description": {
"type": "text",
"analyzer": "english",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"option_type": {"type": "keyword"},
"option_default": {"type": "text"},
"option_example": {"type": "text"},
"option_source": {"type": "keyword"},
}
},
"settings": {
"analysis": {
"normalizer": {
"lowercase": {"type": "custom", "char_filter": [], "filter": ["lowercase"]}
},
"tokenizer": {
"edge": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 50,
"token_chars": [
"letter",
"digit",
"punctuation",
"custom",
2021-08-17 08:55:08 +00:00
],
// Exclude XML characters < and > from "symbol"
"custom_token_chars": "+=~",
},
"attr_path": {
"type": "path_hierarchy",
"delimiter": ".",
},
"attr_path_reverse": {
"type": "path_hierarchy",
"delimiter": ".",
"reverse": true,
2021-08-17 08:55:08 +00:00
},
},
"analyzer": {
"edge": {"tokenizer": "edge", "filter": ["lowercase"]},
"attr_path": {"tokenizer": "attr_path", "filter": ["lowercase"]},
"attr_path_reverse": {"tokenizer": "attr_path_reverse", "filter": ["lowercase"]},
2021-08-17 08:55:08 +00:00
"lowercase": {
"type": "custom",
"tokenizer": "keyword",
"filter": ["lowercase"],
},
},
}
}
});
}
#[derive(Default)]
pub struct Elasticsearch {
client: Client,
}
#[derive(Error, Debug)]
pub enum ElasticsearchError {
#[error("Transport failed to initialize: {0}")]
TransportInitError(elasticsearch::Error),
#[error("Failed to send push exports: {0}")]
PushError(elasticsearch::Error),
#[error("Push exports returned bad result: {0:?}")]
PushResponseError(response::Exception),
#[error("Failed to iitialize index: {0}")]
InitIndexError(elasticsearch::Error),
#[error("Push exports returned bad result: {0:?}")]
InitResponseError(response::Exception),
#[error("An unexpected error occured in the elastic search client: {0}")]
ClientError(elasticsearch::Error),
#[error("Failed to serialize exported data: {0}")]
SerializationError(#[from] serde_json::Error),
#[error("An index with the name \"{0}\" already exists and the (default) stategy is abort")]
IndexExistsError(String),
}
impl Elasticsearch {
pub fn new(url: &str) -> Result<Self, ElasticsearchError> {
let transport =
Transport::single_node(url).map_err(ElasticsearchError::TransportInitError)?;
let client = Client::new(transport);
Ok(Elasticsearch { client })
}
pub fn with_transport(transport: Transport) -> Self {
let client = Client::new(transport);
Elasticsearch { client }
}
pub async fn push_exports(
&self,
config: &Config<'_>,
exports: &[Export],
) -> Result<(), ElasticsearchError> {
// let exports: Result<Vec<Value>, serde_json::Error> = exports.iter().map(serde_json::to_value).collect();
// let exports = exports?;
let bodies = exports.chunks(10_000).map(|chunk| {
chunk
.iter()
.map(|e| BulkOperation::from(BulkOperation::index(e)))
});
for body in bodies {
let response = self
.client
.bulk(elasticsearch::BulkParts::Index(config.index))
.body(body.collect())
.send()
.await
.map_err(ElasticsearchError::PushError)?;
dbg!(response)
.exception()
.await
.map_err(ElasticsearchError::ClientError)?
.map(ElasticsearchError::PushResponseError)
.map_or(Ok(()), Err)?;
}
Ok(())
}
pub async fn ensure_index(&self, config: &Config<'_>) -> Result<(), ElasticsearchError> {
let exists = self.check_index(config).await?;
if exists {
match config.exists_strategy {
ExistsStrategy::Abort => {
warn!(
"Index \"{}\" already exists, strategy is: Abort push",
config.index
);
return Err(ElasticsearchError::IndexExistsError(
config.index.to_owned(),
));
}
ExistsStrategy::Ignore => {
warn!(
"Index \"{}\" already exists, strategy is: Ignore, proceed push",
config.index
);
return Ok(());
}
ExistsStrategy::Recreate => {
warn!(
"Index \"{}\" already exists, strategy is: Recreate index",
config.index
);
self.clear_index(config).await?;
}
}
}
let response = self
.client
.indices()
.create(IndicesCreateParts::Index(config.index))
.body(MAPPING.as_object())
.send()
.await
.map_err(ElasticsearchError::InitIndexError)?;
dbg!(response)
.exception()
.await
.map_err(ElasticsearchError::ClientError)?
.map(ElasticsearchError::PushResponseError)
.map_or(Ok(()), Err)?;
Ok(())
}
pub async fn check_index(&self, config: &Config<'_>) -> Result<bool, ElasticsearchError> {
let response = self
.client
.indices()
.exists(IndicesExistsParts::Index(&[config.index]))
.send()
.await
.map_err(ElasticsearchError::InitIndexError)?;
Ok(response.status_code() == 200)
}
pub async fn clear_index(&self, config: &Config<'_>) -> Result<(), ElasticsearchError> {
let response = self
.client
.indices()
.delete(IndicesDeleteParts::Index(&[config.index]))
.send()
.await
.map_err(ElasticsearchError::InitIndexError)?;
dbg!(response)
.exception()
.await
.map_err(ElasticsearchError::ClientError)?
.map(ElasticsearchError::PushResponseError)
.map_or(Ok(()), Err)
}
pub async fn write_alias(
&self,
_config: &Config<'_>,
2021-08-17 08:55:08 +00:00
index: &str,
alias: &str,
) -> Result<(), ElasticsearchError> {
// delete old alias
info!("Try deleting old alias");
let response = self
2021-08-17 08:55:08 +00:00
.client
.indices()
.get_alias(IndicesGetAliasParts::Name(&[alias]))
.send()
.await
.map_err(ElasticsearchError::InitIndexError)?;
let indices = response
.json::<HashMap<String, Value>>()
.await
.map_err(ElasticsearchError::InitIndexError)?
.keys()
.cloned()
.collect::<Vec<String>>();
self.client
.indices()
.delete_alias(IndicesDeleteAliasParts::IndexName(
&indices.iter().map(AsRef::as_ref).collect::<Vec<_>>(),
&[alias],
))
2021-08-17 08:55:08 +00:00
.send()
.await
.map_err(ElasticsearchError::InitIndexError)?;
// put new alias
info!("Putting new alias");
let response = self
.client
.indices()
.put_alias(IndicesPutAliasParts::IndexName(&[index], alias))
.send()
.await
.map_err(ElasticsearchError::InitIndexError)?;
dbg!(response)
.exception()
.await
.map_err(ElasticsearchError::ClientError)?
.map(ElasticsearchError::PushResponseError)
.map_or(Ok(()), Err)
}
}
#[derive(Debug)]
pub struct Config<'a> {
pub index: &'a str,
pub exists_strategy: ExistsStrategy,
}
arg_enum! {
/// Different strategies to deal with eisting indices
/// Abort: cancel push, return with an error
/// Ignore: Reuse existing index, appending new data
/// Recreate: Drop the existing index and start with a new one
#[derive(Debug, Clone, Copy)]
pub enum ExistsStrategy {
Abort,
Ignore,
Recreate,
}
}
#[cfg(test)]
mod tests {
use std::path::Path;
use super::*;
use crate::{
data::{self, import::Kind},
process_flake,
};
#[tokio::test]
async fn test_delete() -> Result<(), Box<dyn std::error::Error>> {
let es = Elasticsearch::new("http://localhost:9200").unwrap();
let config = &Config {
index: "flakes_index",
exists_strategy: ExistsStrategy::Ignore,
};
es.ensure_index(config).await?;
es.clear_index(config).await?;
let exists = es.check_index(config).await?;
assert!(!exists);
Ok(())
}
#[tokio::test]
async fn test_init() -> Result<(), Box<dyn std::error::Error>> {
let es = Elasticsearch::new("http://localhost:9200").unwrap();
let config = &Config {
index: "flakes_index",
exists_strategy: ExistsStrategy::Recreate,
};
es.ensure_index(config).await?;
let exists = es.check_index(config).await?;
assert!(exists, "Index should exist");
Ok(())
}
#[tokio::test]
async fn test_push() -> Result<(), Box<dyn std::error::Error>> {
let sources: Vec<data::Source> =
data::Source::read_sources_file(Path::new("./examples/examples.in.json"))?;
let exports = sources
.iter()
.flat_map(|s| process_flake(s, &Kind::All, false, &[]))
.map(|(_info, exports)| exports)
2021-08-17 08:55:08 +00:00
.flatten()
.collect::<Vec<Export>>();
println!("{}", serde_json::to_string(&exports[1]).unwrap());
let es = Elasticsearch::new("http://localhost:9200").unwrap();
let config = &Config {
index: "flakes_index",
exists_strategy: ExistsStrategy::Recreate,
};
es.ensure_index(config).await?;
es.push_exports(config, &exports).await?;
Ok(())
}
#[tokio::test]
async fn test_abort_if_index_exists() -> Result<(), Box<dyn std::error::Error>> {
let es = Elasticsearch::new("http://localhost:9200").unwrap();
let config = &Config {
index: "flakes_index",
exists_strategy: ExistsStrategy::Abort,
};
es.ensure_index(&Config {
exists_strategy: ExistsStrategy::Ignore,
..*config
})
.await?;
assert!(matches!(
es.ensure_index(config).await,
Err(ElasticsearchError::IndexExistsError(_)),
));
es.clear_index(config).await?;
Ok(())
}
}