Skip to content

Commit

Permalink
feat(scraper): add scraper CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
sigaloid committed Oct 22, 2024
1 parent 49ef59e commit f3d2f0c
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 3 deletions.
19 changes: 19 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ authors = [
"spikecodes <[email protected]>",
]
edition = "2021"
default-run = "redlib"

[dependencies]
rinja = { version = "0.3.4", default-features = false }
cached = { version = "0.51.3", features = ["async"] }
clap = { version = "4.4.11", default-features = false, features = [
"std",
"env",
"derive",
] }
regex = "1.10.2"
serde = { version = "1.0.193", features = ["derive"] }
Expand Down Expand Up @@ -56,3 +58,11 @@ sealed_test = "1.0.0"
codegen-units = 1
lto = true
strip = "symbols"

[[bin]]
name = "redlib"
path = "src/main.rs"

[[bin]]
name = "scraper"
path = "src/scraper/main.rs"
73 changes: 73 additions & 0 deletions src/scraper/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use std::{fmt::Display, io::Write};

use clap::{Parser, ValueEnum};
use redlib::utils::Post;

#[derive(Parser)]
#[command(name = "my_cli")]
#[command(about = "A simple CLI example", long_about = None)]
struct Cli {
#[arg(short = 's', long = "sub")]
sub: String,

#[arg(short = 'c', long = "count")]
count: usize,

#[arg(long = "sort")]
sort: SortOrder,

#[arg(short = 'f', long = "format", value_enum)]
format: Format,
}

#[derive(Debug, Clone, ValueEnum)]
enum SortOrder {
Hot,
Rising,
New,
Top,
Controversial,
}

impl Display for SortOrder {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SortOrder::Hot => write!(f, "hot"),
SortOrder::Rising => write!(f, "rising"),
SortOrder::New => write!(f, "new"),
SortOrder::Top => write!(f, "top"),
SortOrder::Controversial => write!(f, "controversial"),
}
}
}

#[derive(Debug, Clone, ValueEnum)]
enum Format {
Json,
}

#[tokio::main]
async fn main() {
let cli = Cli::parse();
let (sub, final_count, sort, format) = (cli.sub, cli.count, cli.sort, cli.format);
let initial = format!("/r/{sub}/{sort}.json?&raw_json=1");
let (mut posts, mut after) = Post::fetch(&initial, false).await.unwrap();
while posts.len() < final_count {
print!("\r");
let path = format!("/r/{sub}/{sort}.json?sort={sort}&t=&after={after}&raw_json=1");
let (new_posts, new_after) = Post::fetch(&path, false).await.unwrap();
posts.extend(new_posts);
after = new_after;
// Print number of posts fetched
print!("Fetched {} posts", posts.len());
std::io::stdout().flush().unwrap();
}

match format {
Format::Json => {
let filename: String = format!("{sub}.json");
let json = serde_json::to_string(&posts).unwrap();
std::fs::write(filename, json).unwrap();
}
}
}
15 changes: 12 additions & 3 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use once_cell::sync::Lazy;
use regex::Regex;
use rinja::Template;
use rust_embed::RustEmbed;
use serde::Serialize;
use serde_json::Value;
use serde_json_path::{JsonPath, JsonPathExt};
use std::collections::{HashMap, HashSet};
Expand Down Expand Up @@ -46,6 +47,7 @@ pub enum ResourceType {
}

// Post flair with content, background color and foreground color
#[derive(Serialize)]
pub struct Flair {
pub flair_parts: Vec<FlairPart>,
pub text: String,
Expand All @@ -54,7 +56,7 @@ pub struct Flair {
}

// Part of flair, either emoji or text
#[derive(Clone)]
#[derive(Clone, Serialize)]
pub struct FlairPart {
pub flair_part_type: String,
pub value: String,
Expand Down Expand Up @@ -96,12 +98,14 @@ impl FlairPart {
}
}

#[derive(Serialize)]
pub struct Author {
pub name: String,
pub flair: Flair,
pub distinguished: String,
}

#[derive(Serialize)]
pub struct Poll {
pub poll_options: Vec<PollOption>,
pub voting_end_timestamp: (String, String),
Expand Down Expand Up @@ -129,6 +133,7 @@ impl Poll {
}
}

#[derive(Serialize)]
pub struct PollOption {
pub id: u64,
pub text: String,
Expand Down Expand Up @@ -158,13 +163,14 @@ impl PollOption {
}

// Post flags with nsfw and stickied
#[derive(Serialize)]
pub struct Flags {
pub spoiler: bool,
pub nsfw: bool,
pub stickied: bool,
}

#[derive(Debug)]
#[derive(Debug, Serialize)]
pub struct Media {
pub url: String,
pub alt_url: String,
Expand Down Expand Up @@ -264,6 +270,7 @@ impl Media {
}
}

#[derive(Serialize)]
pub struct GalleryMedia {
pub url: String,
pub width: i64,
Expand Down Expand Up @@ -304,6 +311,7 @@ impl GalleryMedia {
}

// Post containing content, metadata and media
#[derive(Serialize)]
pub struct Post {
pub id: String,
pub title: String,
Expand Down Expand Up @@ -470,7 +478,7 @@ pub struct Comment {
pub prefs: Preferences,
}

#[derive(Default, Clone)]
#[derive(Default, Clone, Serialize)]
pub struct Award {
pub name: String,
pub icon_url: String,
Expand All @@ -484,6 +492,7 @@ impl std::fmt::Display for Award {
}
}

#[derive(Serialize)]
pub struct Awards(pub Vec<Award>);

impl std::ops::Deref for Awards {
Expand Down

0 comments on commit f3d2f0c

Please sign in to comment.