diff --git a/.gitignore b/.gitignore index 5c005f9..6905897 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,4 @@ package-lock.json server db.sqlite +*.db diff --git a/package.json b/package.json index 42facb8..20020e0 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,7 @@ "dev": "bun install && concurrently --restart-tries=3 \"bun css\" \"nodemon --watch src --ext ts,tsx --exec 'bun run --hot src/server.tsx'\"", "prettier": "bunx prettier --write src/ test/ --plugin prettier-plugin-tailwindcss", "server": "bun run --hot src/server.tsx", - "test": "bun run test" + "test": "NODE_ENV=test bun run test" }, "dependencies": { "@unocss/preset-web-fonts": "^0.61.0", diff --git a/src/types.ts b/src/types.ts new file mode 100644 index 0000000..2d9c945 --- /dev/null +++ b/src/types.ts @@ -0,0 +1,8 @@ +export interface Article { + id: string; + title: string; + link: string; + source: string; + page: number; + created_at: string; +} diff --git a/src/util/api.ts b/src/util/api.ts index bf2ada1..1818cbd 100644 --- a/src/util/api.ts +++ b/src/util/api.ts @@ -1,5 +1,7 @@ import { load } from "cheerio"; import { z } from "zod"; +import db from "./db"; +import { Article } from "../types"; const articleSchema = z.object({ title: z.string().min(5), @@ -38,15 +40,43 @@ const isValidArticle = (article: { title: string; link: string }) => { } }; +const clearCacheIfNeeded = () => { + const oldestArticle = db + .prepare("SELECT created_at FROM articles ORDER BY created_at ASC LIMIT 1") + .get() as { created_at: string } | undefined; + + if (oldestArticle) { + const articleDate = new Date(oldestArticle.created_at); + const now = new Date(); + const hoursDifference = + (now.getTime() - articleDate.getTime()) / (1000 * 60 * 60); + + if (hoursDifference >= 8) { + db.prepare("DELETE FROM articles").run(); + } + } +}; + const fetchArticlesFromSource = async ( source: NewsSource, page: number = 1, + clearCache: () => void = clearCacheIfNeeded, ) => { + clearCache(); + + const cachedArticles = db + .prepare("SELECT * FROM articles WHERE source = ? AND page = ?") + .all(source.name, page) as Article[]; + + if (cachedArticles.length > 0) { + return cachedArticles; + } + const response = await fetch(source.url(page)); const text = await response.text(); const $ = load(text); - const articles: { title: string; link: string; source: string }[] = []; + const articles: Article[] = []; $(source.listSelector).each((_, element) => { const title = $(element).text().trim(); @@ -55,15 +85,42 @@ const fetchArticlesFromSource = async ( : $(element).attr("href"); if (title && link) { - articles.push({ + const article: Article = { + id: title, title, link, source: source.name, - }); + page, + created_at: new Date().toISOString(), + }; + if (isValidArticle(article)) { + const existingArticle = db + .prepare("SELECT 1 FROM articles WHERE id = ?") + .get(title); + + if (!existingArticle) { + articles.push(article); + db.prepare( + "INSERT INTO articles (id, title, link, source, page, created_at) VALUES (?, ?, ?, ?, ?, ?)", + ).run( + article.id, + article.title, + article.link, + article.source, + article.page, + article.created_at, + ); + } + } } }); - return articles.filter(isValidArticle); + return articles; }; -export { fetchArticlesFromSource, isValidArticle, newsSources }; +export { + fetchArticlesFromSource, + isValidArticle, + newsSources, + clearCacheIfNeeded, +}; diff --git a/src/util/db.ts b/src/util/db.ts new file mode 100644 index 0000000..2571489 --- /dev/null +++ b/src/util/db.ts @@ -0,0 +1,17 @@ +import { Database } from "bun:sqlite"; + +const isTest = process.env.NODE_ENV === "test"; +const db = new Database(isTest ? "test_articles.db" : "articles.db"); + +db.run(` + CREATE TABLE IF NOT EXISTS articles ( + id TEXT PRIMARY KEY, + title TEXT, + link TEXT, + source TEXT, + page INTEGER, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP + ) +`); + +export default db; diff --git a/test/api.test.ts b/test/api.test.ts index af0af0d..2169ac8 100644 --- a/test/api.test.ts +++ b/test/api.test.ts @@ -1,5 +1,14 @@ -import { describe, expect, it } from "bun:test"; +import { describe, expect, it, beforeAll, afterAll } from "bun:test"; import { fetchArticlesFromSource, newsSources } from "../src/util/api"; +import db from "../src/util/db"; + +beforeAll(() => { + db.run("DELETE FROM articles"); +}); + +afterAll(() => { + db.run("DROP TABLE IF EXISTS articles"); +}); describe("Article Fetching Functions", () => { it("Should fetch and parse NPR articles", async () => { @@ -25,4 +34,47 @@ describe("Article Fetching Functions", () => { }); }); }); + + it("Should cache fetched articles", async () => { + const source = newsSources[0]; + + const initialFetch = await fetchArticlesFromSource(source, 1); + expect(initialFetch.length).toBeGreaterThanOrEqual(10); + + const cachedArticles = db + .prepare("SELECT * FROM articles WHERE source = ? AND page = ?") + .all(source.name, 1); + expect(cachedArticles.length).toBeGreaterThanOrEqual(10); + + const secondFetch = await fetchArticlesFromSource(source, 1); + expect(secondFetch.length).toBeGreaterThanOrEqual(10); + expect(secondFetch).toEqual(initialFetch); + }); + + it("Should call clearCacheIfNeeded when fetching articles", async () => { + const source = newsSources[0]; + + let clearCacheCalled = false; + const clearCacheSpy = () => { + clearCacheCalled = true; + }; + + await fetchArticlesFromSource(source, 1, clearCacheSpy); + expect(clearCacheCalled).toBe(true); + }); + + it("Should miss cache and fetch new articles", async () => { + const source = newsSources[0]; + + db.run("DELETE FROM articles"); + + const initialFetch = await fetchArticlesFromSource(source, 1); + expect(initialFetch.length).toBeGreaterThanOrEqual(10); + + const cachedArticles = db + .prepare("SELECT * FROM articles WHERE source = ? AND page = ?") + .all(source.name, 1); + expect(cachedArticles.length).toBeGreaterThanOrEqual(10); + expect(cachedArticles).toEqual(initialFetch); + }); });