From 12351b8958747cf77045fa1a3ddaa12782525861 Mon Sep 17 00:00:00 2001 From: Jakub Labor Date: Tue, 22 Oct 2024 09:15:22 -0400 Subject: [PATCH] Use RegistryApi in releases --- src/registry_api.rs | 89 +++++++++++++++++++++++++++++- src/utils/mod.rs | 6 -- src/web/mod.rs | 1 + src/web/releases.rs | 130 ++++++++++---------------------------------- 4 files changed, 119 insertions(+), 107 deletions(-) diff --git a/src/registry_api.rs b/src/registry_api.rs index 5d5ada084..74ee753e9 100644 --- a/src/registry_api.rs +++ b/src/registry_api.rs @@ -1,5 +1,5 @@ use crate::{error::Result, utils::retry_async}; -use anyhow::{anyhow, Context}; +use anyhow::{anyhow, bail, Context}; use chrono::{DateTime, Utc}; use reqwest::header::{HeaderValue, ACCEPT, USER_AGENT}; use semver::Version; @@ -69,6 +69,26 @@ impl fmt::Display for OwnerKind { } } +#[derive(Deserialize, Debug)] + +pub(crate) struct SearchCrate { + pub(crate) name: String, +} + +#[derive(Deserialize, Debug)] + +pub(crate) struct SearchMeta { + pub(crate) next_page: Option, + pub(crate) prev_page: Option, +} + +#[derive(Deserialize, Debug)] +pub(crate) struct Search { + pub(crate) crates: Vec, + pub(crate) meta: SearchMeta, + pub(crate) executed_query: Option, +} + impl RegistryApi { pub fn new(api_base: Url, max_retries: u32) -> Result { let headers = vec![ @@ -227,4 +247,71 @@ impl RegistryApi { Ok(result) } + + /// Fetch crates from the registry's API + pub(crate) async fn get_crates(&self, query: Option<&str>) -> Result { + #[derive(Deserialize, Debug)] + struct SearchError { + detail: String, + } + + #[derive(Deserialize, Debug)] + struct SearchResponse { + crates: Option>, + meta: Option, + errors: Option>, + } + + let url = { + let mut url = self.api_base.clone(); + url.path_segments_mut() + .map_err(|()| anyhow!("Invalid API url"))? + .extend(&["api", "v1", "crates"]); + url.set_query(query); + url + }; + + // Extract the query from the query args + let executed_query = url.query_pairs().find_map(|(key, value)| { + if key == "q" { + Some(value.to_string()) + } else { + None + } + }); + + let response: SearchResponse = retry_async( + || async { + Ok(self + .client + .get(url.clone()) + .send() + .await? + .error_for_status()?) + }, + self.max_retries, + ) + .await? + .json() + .await?; + + if let Some(errors) = response.errors { + let messages: Vec<_> = errors.into_iter().map(|e| e.detail).collect(); + bail!("got error from crates.io: {}", messages.join("\n")); + } + + let Some(crates) = response.crates else { + bail!("missing releases in crates.io response"); + }; + + let Some(meta) = response.meta else { + bail!("missing metadata in crates.io response"); + }; + + Ok(Search { + crates, + meta, + executed_query, + }) + } } diff --git a/src/utils/mod.rs b/src/utils/mod.rs index d591b9404..b8f01c5bf 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -31,12 +31,6 @@ pub(crate) mod sized_buffer; use std::{future::Future, thread, time::Duration}; -pub(crate) const APP_USER_AGENT: &str = concat!( - env!("CARGO_PKG_NAME"), - " ", - include_str!(concat!(env!("OUT_DIR"), "/git_version")) -); - pub(crate) fn report_error(err: &anyhow::Error) { // Debug-format for anyhow errors includes context & backtrace if std::env::var("SENTRY_DSN").is_ok() { diff --git a/src/web/mod.rs b/src/web/mod.rs index 3c59f9853..bd4010021 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -424,6 +424,7 @@ async fn apply_middleware( .layer(Extension(context.service_metrics()?)) .layer(Extension(context.instance_metrics()?)) .layer(Extension(context.config()?)) + .layer(Extension(context.registry_api()?)) .layer(Extension(async_storage)) .layer(option_layer(template_data.map(Extension))) .layer(middleware::from_fn(csp::csp_middleware)) diff --git a/src/web/releases.rs b/src/web/releases.rs index c06e5e633..63d9d9d8e 100644 --- a/src/web/releases.rs +++ b/src/web/releases.rs @@ -3,7 +3,7 @@ use crate::{ build_queue::QueuedCrate, cdn, impl_axum_webpage, - utils::{report_error, retry_async}, + utils::report_error, web::{ axum_parse_uri_with_params, axum_redirect, encode_url_path, error::{AxumNope, AxumResult}, @@ -12,9 +12,9 @@ use crate::{ page::templates::{filters, RenderRegular, RenderSolid}, ReqVersion, }, - AsyncBuildQueue, Config, InstanceMetrics, + AsyncBuildQueue, Config, InstanceMetrics, RegistryApi, }; -use anyhow::{anyhow, bail, Context as _, Result}; +use anyhow::{anyhow, Context as _, Result}; use axum::{ extract::{Extension, Query}, response::{IntoResponse, Response as AxumResponse}, @@ -22,14 +22,13 @@ use axum::{ use base64::{engine::general_purpose::STANDARD as b64, Engine}; use chrono::{DateTime, Utc}; use futures_util::stream::TryStreamExt; -use once_cell::sync::Lazy; use rinja::Template; -use serde::{Deserialize, Serialize}; +use serde::Serialize; use sqlx::Row; use std::collections::{BTreeMap, HashMap, HashSet}; use std::str; use std::sync::Arc; -use tracing::{debug, warn}; +use tracing::warn; use url::form_urlencoded; use super::cache::CachePolicy; @@ -142,85 +141,14 @@ struct SearchResult { /// This delegates to the crates.io search API. async fn get_search_results( conn: &mut sqlx::PgConnection, - config: &Config, - query_params: &str, + registry: &RegistryApi, + query_params: Option<&str>, ) -> Result { - #[derive(Deserialize)] - struct CratesIoError { - detail: String, - } - #[derive(Deserialize)] - struct CratesIoSearchResult { - crates: Option>, - meta: Option, - errors: Option>, - } - #[derive(Deserialize, Debug)] - struct CratesIoCrate { - name: String, - } - #[derive(Deserialize, Debug)] - struct CratesIoMeta { - next_page: Option, - prev_page: Option, - } - - use crate::utils::APP_USER_AGENT; - use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, USER_AGENT}; - use reqwest::Client as HttpClient; - - static HTTP_CLIENT: Lazy = Lazy::new(|| { - let mut headers = HeaderMap::new(); - headers.insert(USER_AGENT, HeaderValue::from_static(APP_USER_AGENT)); - headers.insert(ACCEPT, HeaderValue::from_static("application/json")); - HttpClient::builder() - .default_headers(headers) - .build() - .unwrap() - }); - - let url = config - .registry_api_host - .join(&format!("api/v1/crates{query_params}"))?; - debug!("fetching search results from {}", url); - - // extract the query from the query args. - // This is easier because the query might have been encoded in the bash64-encoded - // paginate parameter. - let executed_query = url.query_pairs().find_map(|(key, value)| { - if key == "q" { - Some(value.to_string()) - } else { - None - } - }); - - let response: CratesIoSearchResult = retry_async( - || async { - Ok(HTTP_CLIENT - .get(url.clone()) - .send() - .await? - .error_for_status()?) - }, - config.crates_io_api_call_retries, - ) - .await? - .json() - .await?; - - if let Some(errors) = response.errors { - let messages: Vec<_> = errors.into_iter().map(|e| e.detail).collect(); - bail!("got error from crates.io: {}", messages.join("\n")); - } - - let Some(crates) = response.crates else { - bail!("missing releases in crates.io response"); - }; - - let Some(meta) = response.meta else { - bail!("missing metadata in crates.io response"); - }; + let crate::registry_api::Search { + crates, + meta, + executed_query, + } = registry.get_crates(query_params).await?; let names = Arc::new( crates @@ -573,6 +501,7 @@ impl_axum_webpage! { pub(crate) async fn search_handler( mut conn: DbConnection, Extension(config): Extension>, + Extension(registry): Extension>, Extension(metrics): Extension>, Query(mut params): Query>, ) -> AxumResult { @@ -645,20 +574,21 @@ pub(crate) async fn search_handler( AxumNope::NoResults })?; let query_params = String::from_utf8_lossy(&decoded); - - if !query_params.starts_with('?') { - // sometimes we see plain bytes being passed to `paginate`. - // In these cases we just return `NoResults` and don't call - // the crates.io API. - // The whole point of the `paginate` design is that we don't - // know anything about the pagination args and crates.io can - // change them as they wish, so we cannot do any more checks here. - warn!( - "didn't get query args in `paginate` arguments for search: \"{}\"", - query_params - ); - return Err(AxumNope::NoResults); - } + let query_params = match query_params.strip_prefix('?') { + Some(query_params) => query_params, + None => { + // sometimes we see plain bytes being passed to `paginate`. + // In these cases we just return `NoResults` and don't call + // the crates.io API. + // The whole point of the `paginate` design is that we don't + // know anything about the pagination args and crates.io can + // change them as they wish, so we cannot do any more checks here. + warn!( + "didn't get query args in `paginate` arguments for search: \"{query_params}\"" + ); + return Err(AxumNope::NoResults); + } + }; let mut p = form_urlencoded::parse(query_params.as_bytes()); if let Some(v) = p.find_map(|(k, v)| { @@ -671,7 +601,7 @@ pub(crate) async fn search_handler( sort_by = v; }; - get_search_results(&mut conn, &config, &query_params).await? + get_search_results(&mut conn, ®istry, Some(query_params)).await? } else if !query.is_empty() { let query_params: String = form_urlencoded::Serializer::new(String::new()) .append_pair("q", &query) @@ -679,7 +609,7 @@ pub(crate) async fn search_handler( .append_pair("per_page", &RELEASES_IN_RELEASES.to_string()) .finish(); - get_search_results(&mut conn, &config, &format!("?{}", &query_params)).await? + get_search_results(&mut conn, ®istry, Some(&query_params)).await? } else { return Err(AxumNope::NoResults); };