Skip to content

Commit

Permalink
Use RegistryApi in releases
Browse files Browse the repository at this point in the history
  • Loading branch information
SuficioAC committed Oct 22, 2024
1 parent 4e3e136 commit 12351b8
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 107 deletions.
89 changes: 88 additions & 1 deletion src/registry_api.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{error::Result, utils::retry_async};
use anyhow::{anyhow, Context};
use anyhow::{anyhow, bail, Context};
use chrono::{DateTime, Utc};
use reqwest::header::{HeaderValue, ACCEPT, USER_AGENT};
use semver::Version;
Expand Down Expand Up @@ -69,6 +69,26 @@ impl fmt::Display for OwnerKind {
}
}

#[derive(Deserialize, Debug)]

pub(crate) struct SearchCrate {
pub(crate) name: String,
}

#[derive(Deserialize, Debug)]

pub(crate) struct SearchMeta {
pub(crate) next_page: Option<String>,
pub(crate) prev_page: Option<String>,
}

#[derive(Deserialize, Debug)]
pub(crate) struct Search {
pub(crate) crates: Vec<SearchCrate>,
pub(crate) meta: SearchMeta,
pub(crate) executed_query: Option<String>,
}

impl RegistryApi {
pub fn new(api_base: Url, max_retries: u32) -> Result<Self> {
let headers = vec![
Expand Down Expand Up @@ -227,4 +247,71 @@ impl RegistryApi {

Ok(result)
}

/// Fetch crates from the registry's API
pub(crate) async fn get_crates(&self, query: Option<&str>) -> Result<Search> {
#[derive(Deserialize, Debug)]
struct SearchError {
detail: String,
}

#[derive(Deserialize, Debug)]
struct SearchResponse {
crates: Option<Vec<SearchCrate>>,
meta: Option<SearchMeta>,
errors: Option<Vec<SearchError>>,
}

let url = {
let mut url = self.api_base.clone();
url.path_segments_mut()
.map_err(|()| anyhow!("Invalid API url"))?
.extend(&["api", "v1", "crates"]);
url.set_query(query);
url
};

// Extract the query from the query args
let executed_query = url.query_pairs().find_map(|(key, value)| {
if key == "q" {
Some(value.to_string())
} else {
None
}
});

let response: SearchResponse = retry_async(
|| async {
Ok(self
.client
.get(url.clone())
.send()
.await?
.error_for_status()?)
},
self.max_retries,
)
.await?
.json()
.await?;

if let Some(errors) = response.errors {
let messages: Vec<_> = errors.into_iter().map(|e| e.detail).collect();
bail!("got error from crates.io: {}", messages.join("\n"));
}

let Some(crates) = response.crates else {
bail!("missing releases in crates.io response");
};

let Some(meta) = response.meta else {
bail!("missing metadata in crates.io response");
};

Ok(Search {
crates,
meta,
executed_query,
})
}
}
6 changes: 0 additions & 6 deletions src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,6 @@ pub(crate) mod sized_buffer;

use std::{future::Future, thread, time::Duration};

pub(crate) const APP_USER_AGENT: &str = concat!(
env!("CARGO_PKG_NAME"),
" ",
include_str!(concat!(env!("OUT_DIR"), "/git_version"))
);

pub(crate) fn report_error(err: &anyhow::Error) {
// Debug-format for anyhow errors includes context & backtrace
if std::env::var("SENTRY_DSN").is_ok() {
Expand Down
1 change: 1 addition & 0 deletions src/web/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ async fn apply_middleware(
.layer(Extension(context.service_metrics()?))
.layer(Extension(context.instance_metrics()?))
.layer(Extension(context.config()?))
.layer(Extension(context.registry_api()?))
.layer(Extension(async_storage))
.layer(option_layer(template_data.map(Extension)))
.layer(middleware::from_fn(csp::csp_middleware))
Expand Down
130 changes: 30 additions & 100 deletions src/web/releases.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use crate::{
build_queue::QueuedCrate,
cdn, impl_axum_webpage,
utils::{report_error, retry_async},
utils::report_error,
web::{
axum_parse_uri_with_params, axum_redirect, encode_url_path,
error::{AxumNope, AxumResult},
Expand All @@ -12,24 +12,23 @@ use crate::{
page::templates::{filters, RenderRegular, RenderSolid},
ReqVersion,
},
AsyncBuildQueue, Config, InstanceMetrics,
AsyncBuildQueue, Config, InstanceMetrics, RegistryApi,
};
use anyhow::{anyhow, bail, Context as _, Result};
use anyhow::{anyhow, Context as _, Result};
use axum::{
extract::{Extension, Query},
response::{IntoResponse, Response as AxumResponse},
};
use base64::{engine::general_purpose::STANDARD as b64, Engine};
use chrono::{DateTime, Utc};
use futures_util::stream::TryStreamExt;
use once_cell::sync::Lazy;
use rinja::Template;
use serde::{Deserialize, Serialize};
use serde::Serialize;
use sqlx::Row;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::str;
use std::sync::Arc;
use tracing::{debug, warn};
use tracing::warn;
use url::form_urlencoded;

use super::cache::CachePolicy;
Expand Down Expand Up @@ -142,85 +141,14 @@ struct SearchResult {
/// This delegates to the crates.io search API.
async fn get_search_results(
conn: &mut sqlx::PgConnection,
config: &Config,
query_params: &str,
registry: &RegistryApi,
query_params: Option<&str>,
) -> Result<SearchResult, anyhow::Error> {
#[derive(Deserialize)]
struct CratesIoError {
detail: String,
}
#[derive(Deserialize)]
struct CratesIoSearchResult {
crates: Option<Vec<CratesIoCrate>>,
meta: Option<CratesIoMeta>,
errors: Option<Vec<CratesIoError>>,
}
#[derive(Deserialize, Debug)]
struct CratesIoCrate {
name: String,
}
#[derive(Deserialize, Debug)]
struct CratesIoMeta {
next_page: Option<String>,
prev_page: Option<String>,
}

use crate::utils::APP_USER_AGENT;
use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, USER_AGENT};
use reqwest::Client as HttpClient;

static HTTP_CLIENT: Lazy<HttpClient> = Lazy::new(|| {
let mut headers = HeaderMap::new();
headers.insert(USER_AGENT, HeaderValue::from_static(APP_USER_AGENT));
headers.insert(ACCEPT, HeaderValue::from_static("application/json"));
HttpClient::builder()
.default_headers(headers)
.build()
.unwrap()
});

let url = config
.registry_api_host
.join(&format!("api/v1/crates{query_params}"))?;
debug!("fetching search results from {}", url);

// extract the query from the query args.
// This is easier because the query might have been encoded in the bash64-encoded
// paginate parameter.
let executed_query = url.query_pairs().find_map(|(key, value)| {
if key == "q" {
Some(value.to_string())
} else {
None
}
});

let response: CratesIoSearchResult = retry_async(
|| async {
Ok(HTTP_CLIENT
.get(url.clone())
.send()
.await?
.error_for_status()?)
},
config.crates_io_api_call_retries,
)
.await?
.json()
.await?;

if let Some(errors) = response.errors {
let messages: Vec<_> = errors.into_iter().map(|e| e.detail).collect();
bail!("got error from crates.io: {}", messages.join("\n"));
}

let Some(crates) = response.crates else {
bail!("missing releases in crates.io response");
};

let Some(meta) = response.meta else {
bail!("missing metadata in crates.io response");
};
let crate::registry_api::Search {
crates,
meta,
executed_query,
} = registry.get_crates(query_params).await?;

let names = Arc::new(
crates
Expand Down Expand Up @@ -573,6 +501,7 @@ impl_axum_webpage! {
pub(crate) async fn search_handler(
mut conn: DbConnection,
Extension(config): Extension<Arc<Config>>,
Extension(registry): Extension<Arc<RegistryApi>>,
Extension(metrics): Extension<Arc<InstanceMetrics>>,
Query(mut params): Query<HashMap<String, String>>,
) -> AxumResult<AxumResponse> {
Expand Down Expand Up @@ -645,20 +574,21 @@ pub(crate) async fn search_handler(
AxumNope::NoResults
})?;
let query_params = String::from_utf8_lossy(&decoded);

if !query_params.starts_with('?') {
// sometimes we see plain bytes being passed to `paginate`.
// In these cases we just return `NoResults` and don't call
// the crates.io API.
// The whole point of the `paginate` design is that we don't
// know anything about the pagination args and crates.io can
// change them as they wish, so we cannot do any more checks here.
warn!(
"didn't get query args in `paginate` arguments for search: \"{}\"",
query_params
);
return Err(AxumNope::NoResults);
}
let query_params = match query_params.strip_prefix('?') {
Some(query_params) => query_params,
None => {
// sometimes we see plain bytes being passed to `paginate`.
// In these cases we just return `NoResults` and don't call
// the crates.io API.
// The whole point of the `paginate` design is that we don't
// know anything about the pagination args and crates.io can
// change them as they wish, so we cannot do any more checks here.
warn!(
"didn't get query args in `paginate` arguments for search: \"{query_params}\""
);
return Err(AxumNope::NoResults);
}
};

let mut p = form_urlencoded::parse(query_params.as_bytes());
if let Some(v) = p.find_map(|(k, v)| {
Expand All @@ -671,15 +601,15 @@ pub(crate) async fn search_handler(
sort_by = v;
};

get_search_results(&mut conn, &config, &query_params).await?
get_search_results(&mut conn, &registry, Some(query_params)).await?
} else if !query.is_empty() {
let query_params: String = form_urlencoded::Serializer::new(String::new())
.append_pair("q", &query)
.append_pair("sort", &sort_by)
.append_pair("per_page", &RELEASES_IN_RELEASES.to_string())
.finish();

get_search_results(&mut conn, &config, &format!("?{}", &query_params)).await?
get_search_results(&mut conn, &registry, Some(&query_params)).await?
} else {
return Err(AxumNope::NoResults);
};
Expand Down

0 comments on commit 12351b8

Please sign in to comment.