Skip to content

Commit

Permalink
Remove storage of self profile data into the database
Browse files Browse the repository at this point in the history
It has been uploaded to S3 for a long time, and there were no queries reading the data anyway.
  • Loading branch information
Kobzol committed Nov 16, 2023
1 parent 45e0afe commit 0b3051b
Show file tree
Hide file tree
Showing 8 changed files with 10 additions and 354 deletions.
23 changes: 0 additions & 23 deletions collector/src/compile/execute/bencher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,29 +134,6 @@ impl<'a> BenchProcessor<'a> {
));
}

if let Some(sp) = &stats.1 {
let conn = &*self.conn;
let artifact_row_id = self.artifact_row_id;
let benchmark = self.benchmark.0.as_str();
for qd in &sp.query_data {
buf.push(conn.record_self_profile_query(
collection,
artifact_row_id,
benchmark,
profile,
scenario,
qd.label.as_str(),
database::QueryDatum {
self_time: qd.self_time,
blocked_time: qd.blocked_time,
incremental_load_time: qd.incremental_load_time,
number_of_cache_hits: qd.number_of_cache_hits,
invocation_count: qd.invocation_count,
},
));
}
}

while let Some(()) = buf.next().await {}
}

Expand Down
27 changes: 1 addition & 26 deletions database/schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Below is an explanation of the current database schema. This schema is duplicate

## Overview

In general, the database is used to track three groups of things:
In general, the database is used to track four groups of things:
* Performance run statistics (e.g., instruction count) for compile time benchmarks on a per benchmark, profile, and scenario basis.
* Performance run statistics (e.g., instruction count) for runtime benchmarks on a per benchmark basis.
* Self profile data gathered with `-Zself-profile`.
Expand Down Expand Up @@ -210,31 +210,6 @@ series aid cid value
1 1 1 24.93
```

### self_profile_query_series

Describes a parametrization of a self-profile query. Contains a unique combination
of a benchmark, profile, scenario and a `rustc` self-profile query.

This table exists to avoid duplicating benchmarks, profiles, scenarios etc. many times in the `self_profile_query` table.

```
sqlite> select * from runtime_pstat limit 1;
id crate profile cache query
-- ----- ------- ---------- -----
1 hello-world debug full hir_crate
```

### self_profile_query

A measured value of a single `rustc` self-profile query that is unique to a `self_profile_query_series`, `artifact` and a `collection`.

```
sqlite> select * from runtime_pstat limit 1;
series aid cid self_time blocked_time incremental_load_time number_of_cache_hits invocation_count
-- ----- --- --------- ------------ --------------------- -------------------- ----------------
1 42 58 11.8 10.2 8.4 224 408
```

### rustc_compilation

Records the duration of compiling a `rustc` crate for a given artifact and collection.
Expand Down
70 changes: 1 addition & 69 deletions database/src/bin/postgres-to-sqlite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,66 +357,6 @@ impl Table for RustcCompilation {
}
}

struct SelfProfileQuery;

impl Table for SelfProfileQuery {
fn name(&self) -> &'static str {
"self_profile_query"
}

fn postgres_select_statement(&self, since_weeks_ago: Option<u32>) -> String {
let s = "select series, aid, cid, self_time, blocked_time, incremental_load_time, number_of_cache_hits, invocation_count from ".to_string() + self.name();
with_filter_clause_maybe(s, ARTIFACT_JOIN_AND_WHERE, since_weeks_ago)
}

fn sqlite_insert_statement(&self) -> &'static str {
"insert into self_profile_query (series, aid, cid, self_time, blocked_time, incremental_load_time, number_of_cache_hits, invocation_count) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"
}

fn sqlite_execute_insert(&self, statement: &mut rusqlite::Statement, row: tokio_postgres::Row) {
statement
.execute(params![
row.get::<_, i32>(0),
row.get::<_, i32>(1),
row.get::<_, i32>(2),
row.get::<_, Option<i64>>(3),
row.get::<_, Option<i64>>(4),
row.get::<_, Option<i64>>(5),
row.get::<_, Option<i32>>(6),
row.get::<_, Option<i32>>(7),
])
.unwrap();
}
}

struct SelfProfileQuerySeries;

impl Table for SelfProfileQuerySeries {
fn name(&self) -> &'static str {
"self_profile_query_series"
}

fn postgres_select_statement(&self, _since_weeks_ago: Option<u32>) -> String {
"select id, crate, profile, cache, query from ".to_string() + self.name()
}

fn sqlite_insert_statement(&self) -> &'static str {
"insert into self_profile_query_series (id, crate, profile, cache, query) VALUES (?, ?, ?, ?, ?)"
}

fn sqlite_execute_insert(&self, statement: &mut rusqlite::Statement, row: tokio_postgres::Row) {
statement
.execute(params![
row.get::<_, i32>(0),
row.get::<_, &str>(1),
row.get::<_, &str>(2),
row.get::<_, &str>(3),
row.get::<_, &str>(4),
])
.unwrap();
}
}

#[tokio::main]
async fn main() -> anyhow::Result<()> {
env_logger::init();
Expand All @@ -434,8 +374,6 @@ async fn main() -> anyhow::Result<()> {
&PullRequestBuild,
&RawSelfProfile,
&RustcCompilation,
&SelfProfileQuerySeries,
&SelfProfileQuery,
];

let table_names: Vec<_> = tables.iter().map(|table| table.name()).collect();
Expand Down Expand Up @@ -494,18 +432,12 @@ async fn main() -> anyhow::Result<()> {
let postgres = matches.get_one::<String>("postgres-db").unwrap();
let sqlite = matches.get_one::<String>("sqlite-db").unwrap();

let mut exclude_tables: std::collections::HashSet<_> = matches
let exclude_tables: std::collections::HashSet<_> = matches
.get_many::<String>("exclude-tables")
.unwrap_or_default()
.cloned()
.collect();

if matches.get_flag("no-self-profile") {
exclude_tables.insert(SelfProfileQuerySeries.name().to_owned());
exclude_tables.insert(SelfProfileQuery.name().to_owned());
// `RawSelfProfile` is intentionally kept.
}

let since_weeks_ago = matches.get_one::<u32>("since-weeks-ago").copied();

let mut postgres: tokio_postgres::Client =
Expand Down
82 changes: 0 additions & 82 deletions database/src/bin/sqlite-to-postgres.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,86 +438,6 @@ impl Table for RustcCompilation {
}
}

struct SelfProfileQuery;

#[derive(Serialize)]
struct SelfProfileQueryRow {
series: i32,
aid: i32,
cid: i32,
self_time: Nullable<i64>,
blocked_time: Nullable<i64>,
incremental_load_time: Nullable<i64>,
number_of_cache_hits: Nullable<i32>,
invocation_count: Nullable<i32>,
}

impl Table for SelfProfileQuery {
fn name() -> &'static str {
"self_profile_query"
}

fn sqlite_attributes() -> &'static str {
"series, aid, cid, self_time, blocked_time, incremental_load_time, number_of_cache_hits, invocation_count"
}

fn postgres_generated_id_attribute() -> Option<&'static str> {
None
}

fn write_postgres_csv_row<W: Write>(writer: &mut csv::Writer<W>, row: &rusqlite::Row) {
writer
.serialize(SelfProfileQueryRow {
series: row.get(0).unwrap(),
aid: row.get(1).unwrap(),
cid: row.get(2).unwrap(),
self_time: row.get(3).unwrap(),
blocked_time: row.get(4).unwrap(),
incremental_load_time: row.get(5).unwrap(),
number_of_cache_hits: row.get(6).unwrap(),
invocation_count: row.get(7).unwrap(),
})
.unwrap();
}
}

struct SelfProfileQuerySeries;

#[derive(Serialize)]
struct SelfProfileQuerySeriesRow<'a> {
id: i32,
krate: &'a str,
profile: &'a str,
cache: &'a str,
query: &'a str,
}

impl Table for SelfProfileQuerySeries {
fn name() -> &'static str {
"self_profile_query_series"
}

fn sqlite_attributes() -> &'static str {
"id, crate, profile, cache, query"
}

fn postgres_generated_id_attribute() -> Option<&'static str> {
Some("id")
}

fn write_postgres_csv_row<W: Write>(writer: &mut csv::Writer<W>, row: &rusqlite::Row) {
writer
.serialize(SelfProfileQuerySeriesRow {
id: row.get(0).unwrap(),
krate: row.get_ref(1).unwrap().as_str().unwrap(),
profile: row.get_ref(2).unwrap().as_str().unwrap(),
cache: row.get_ref(3).unwrap().as_str().unwrap(),
query: row.get_ref(4).unwrap().as_str().unwrap(),
})
.unwrap();
}
}

// `Nullable<T>` helps to work around the fact that the `csv` crate (and the CSV
// format in general) doesn't distinguish between nulls and empty strings, while
// the Postgres CSV format does.
Expand Down Expand Up @@ -639,8 +559,6 @@ async fn main() -> anyhow::Result<()> {
copy::<PullRequestBuild>(&sqlite_tx, &postgres_tx).await;
copy::<RawSelfProfile>(&sqlite_tx, &postgres_tx).await;
copy::<RustcCompilation>(&sqlite_tx, &postgres_tx).await;
copy::<SelfProfileQuerySeries>(&sqlite_tx, &postgres_tx).await;
copy::<SelfProfileQuery>(&sqlite_tx, &postgres_tx).await;

// This is overly paranoid, but don't commit the Postgres transaction until
// the rollback of the SQLite transaction succeeds.
Expand Down
9 changes: 0 additions & 9 deletions database/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,15 +410,6 @@ impl ArtifactId {

intern!(pub struct QueryLabel);

#[derive(PartialEq, Eq, Clone, Debug)]
pub struct QueryDatum {
pub self_time: Duration,
pub blocked_time: Duration,
pub incremental_load_time: Duration,
pub number_of_cache_hits: u32,
pub invocation_count: u32,
}

/// A database row ID for an artifact in the artifact table
#[derive(Serialize, Deserialize, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub struct ArtifactIdNumber(pub u32);
Expand Down
11 changes: 0 additions & 11 deletions database/src/pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,6 @@ pub trait Connection: Send + Sync {
profile: Profile,
scenario: Scenario,
);
#[allow(clippy::too_many_arguments)]
async fn record_self_profile_query(
&self,
collection: CollectionId,
artifact: ArtifactIdNumber,
benchmark: &str,
profile: Profile,
scenario: Scenario,
query: &str,
qd: crate::QueryDatum,
);
async fn record_error(&self, artifact: ArtifactIdNumber, krate: &str, error: &str);
async fn record_rustc_crate(
&self,
Expand Down
Loading

0 comments on commit 0b3051b

Please sign in to comment.