Skip to content

Commit

Permalink
Add filter config file
Browse files Browse the repository at this point in the history
  • Loading branch information
richardr1126 committed Dec 15, 2024
1 parent 8a865c6 commit d0c7d5c
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 169 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
*.db-wal
*.db-shm
output.txt
*.dbcnb

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
13 changes: 6 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,12 @@ The feed generator uses the following filters to curate content:

1. **Update files:**
- Update `publish_feed.py` with your feed details. **(REQUIRED)**
- Modify filters in `firehose/data_filter.py`. **(OPTIONAL)**
- Update environment variables in `.env`. **(REQUIRED)**
- Modify feed post inclusion filters in `firehose/filter_config.py`. **(OPTIONAL)**
- Update environment variables. **(REQUIRED)**

```shell
cp example.env .env
```

2. **Publish Your Feed:** Follow the [Publishing Your Feed](#publishing-your-feed) instructions below.

Expand All @@ -53,11 +57,6 @@ After successful publication, your feed will appear in the Bluesky app. Obtain t

## Installation with Docker Compose

1. Configure environment variables:
```shell
cp example.env .env
```

2. Edit .env with your settings:
```env
HOSTNAME=feed.yourdomain.com
Expand Down
6 changes: 6 additions & 0 deletions example.env
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# Hostname: The hostname of the Flask app `web` container
HOSTNAME=feed.yourdomain.com
# Handle: The handle of the user that the app will use to publish_feed and hydrate post with likes
HANDLE=your-handle.bsky.social
# Password: The password from bsky app passwords
PASSWORD=your-password
# uri: The uri of the newly published feed (obtained after running `python publish_feed.py`)
CHRONOLOGICAL_TRENDING_URI=at://did:plc:abcde...

# Postgres crediatials used to intialize the database and connect to it
POSTGRES_USER=postgres
POSTGRES_PASSWORD=your-db-password
POSTGRES_DB=feed
179 changes: 17 additions & 162 deletions firehose/data_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,159 +4,30 @@
from atproto import models, Client, IdResolver
from utils.logger import logger
from database import db, Post, init_client
import json
from pathlib import Path

handle_resolver = IdResolver().handle
did_resolver = IdResolver().did
handles_to_include = [
'stormlightmemes.bsky.social',
'brotherwisegames.bsky.social',
]
hanles_to_exclude = [
'flintds.bsky.social'
]
dids_to_include = [handle_resolver.resolve(handle) for handle in handles_to_include]
dids_to_exclude = [handle_resolver.resolve(handle) for handle in hanles_to_exclude]

PHRASES = [
'17th shard',
'bands of mourning',
'brandon sanderson',
'cognitive realm',
'rhythm of war',
'shadows of self',
'sixth of the dusk',
'shadows for silence',
'shadows of silence',
'ember dark',
"emperor's soul",
'isles of the ember dark',
'stormlight archive',
'sunlit man',
'alloy of law',
'hero of ages',
'lost metal',
'way of kings',
'well of ascension',
'tress of the emerald sea',
'wind and truth',
'words of radiance',
'yumi and the nightmare painter',
'shattered planes',
'knight radiant',
'knights radiant',
'journey before destination',
'life before death, strength before weakness',
'dragon steel nexus',
]
FILTER_FILE = Path('filter_config.json')

INCLUSIVE_MULTI_TOKENS = [
'brandon sanderson',
'yumi sanderson',
'vin elend',
'yumi painter',
'shallan adolin',
'kaladin syl',
'kaladin adolin',
'kaladin shallan',
'navani kholin',
'shallan pattern',
'shallan veil',
'shallan radiant',
'vin kelsier',
'kelsier survivor',
'wax wayne marasi',
'steris marasi',
'cryptic spren',
'steris wax',
'szeth nightblood',
'shades threnody',
'threnody hell'
]
def load_filters():
"""Load filters from JSON file or return defaults if file doesn't exist"""
if FILTER_FILE.exists():
with open(FILTER_FILE, 'r') as f:
return json.load(f)

TOKENS = [
'allomancy',
'bondsmith',
'cosmere',
'dalinar',
'dawnshard',
'dragonsteel',
'dustbringer',
'edgedancer',
'elantris',
'elsecaller',
'stormblessed',
'thaidakar',
'kholin',
'lightweaver',
'mistborn',
'oathbringer',
'sanderlanche',
'sazed',
'shadesmar',
'skybreaker',
'spren',
'stoneward',
'stormlight',
'surgebinding',
'truthwatcher',
'warbreaker',
'willshaper',
'windrunner',
'roshar',
'scadrial',
'taldain',
'voidbringer',
'shardblade',
'shardplate',
'shardbearer',
'feruchemy',
'hemalurgy',
'lerasium',
'atium',
'mistcloak',
'kandra',
'koloss',
'skaa',
'highstorm',
'parshendi',
'urithiru',
'honorblade',
'surgebinder',
'dawnshard',
'worldhopper',
'perpendicularity',
'adonalsium',
'chasmfiend',
'worldbringer',
'allomancer',
'highspren',
'elantrian',
'inkspren',
'honorspren',
'cultivationspren',
'peakspren',
'ashspren',
'luckspren',
'windspren',
'lifespren',
'towerlight',
'voidlight',
'brandosando',
'numuhukumakiaki\'ialunamor',
'dsnx24',
'dsnx2024',
'dragonsteelnexus',
'dragonsteelnexus2024'
]
filters = load_filters()
HANDLES = filters['HANDLES']
EXCLUDE_HANDLES = filters['EXCLUDE_HANDLES']
PHRASES = filters['PHRASES']
INCLUSIVE_MULTI_TOKENS = filters['INCLUSIVE_MULTI_TOKENS']
TOKENS = filters['TOKENS']
EXCLUDE_TOKENS = filters['EXCLUDE_TOKENS']

EXCLUDE_TOKENS = [
'trump',
'sylvana',
'sylvanna',
'alleria',
'uriele',
'mormon',
]
dids_to_include = [handle_resolver.resolve(handle) for handle in HANDLES]
dids_to_exclude = [handle_resolver.resolve(handle) for handle in EXCLUDE_HANDLES]

def compile_pattern(items, word_boundary=True, optional_prefix=None, plural=False):
escaped = [re.escape(item) for item in items]
Expand Down Expand Up @@ -215,22 +86,6 @@ def matches_filters(text):

return False

# def get_full_post_info(post_uri, record):
# logger.info(f'Processing matched post: {record.text}')

# # Log-in to the client
# client = Client()
# client.login(HANDLE, PASSWORD)

# # Retrieve the post thread
# response = client.app.bsky.feed.get_post_thread({'uri': post_uri})

# # Access the post details
# post_details = response['thread']
# logger.info(f'Post details: {post_details}')
# # post.author.handle

# return post_details

def operations_callback(ops: defaultdict) -> None:
created_posts = ops[models.ids.AppBskyFeedPost]['created']
Expand Down
146 changes: 146 additions & 0 deletions firehose/filter_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
{
"HANDLES": [
"stormlightmemes.bsky.social",
"brotherwisegames.bsky.social"
],
"EXCLUDE_HANDLES": [
"flintds.bsky.social"
],
"PHRASES": [
"17th shard",
"bands of mourning",
"brandon sanderson",
"cognitive realm",
"rhythm of war",
"shadows of self",
"sixth of the dusk",
"shadows for silence",
"shadows of silence",
"ember dark",
"emperor's soul",
"isles of the ember dark",
"stormlight archive",
"sunlit man",
"alloy of law",
"hero of ages",
"lost metal",
"way of kings",
"well of ascension",
"tress of the emerald sea",
"wind and truth",
"words of radiance",
"yumi and the nightmare painter",
"shattered planes",
"knight radiant",
"knights radiant",
"journey before destination",
"life before death, strength before weakness",
"dragon steel nexus"
],
"INCLUSIVE_MULTI_TOKENS": [
"brandon sanderson",
"yumi sanderson",
"vin elend",
"yumi painter",
"shallan adolin",
"kaladin syl",
"kaladin adolin",
"kaladin shallan",
"navani kholin",
"shallan pattern",
"shallan veil",
"shallan radiant",
"vin kelsier",
"kelsier survivor",
"wax wayne marasi",
"steris marasi",
"cryptic spren",
"steris wax",
"szeth nightblood",
"shades threnody",
"threnody hell"
],
"TOKENS": [
"allomancy",
"bondsmith",
"cosmere",
"dalinar",
"dawnshard",
"dragonsteel",
"dustbringer",
"edgedancer",
"elantris",
"elsecaller",
"stormblessed",
"thaidakar",
"kholin",
"lightweaver",
"mistborn",
"oathbringer",
"sanderlanche",
"sazed",
"shadesmar",
"skybreaker",
"spren",
"stoneward",
"stormlight",
"surgebinding",
"truthwatcher",
"warbreaker",
"willshaper",
"windrunner",
"roshar",
"scadrial",
"taldain",
"voidbringer",
"shardblade",
"shardplate",
"shardbearer",
"feruchemy",
"hemalurgy",
"lerasium",
"atium",
"mistcloak",
"kandra",
"koloss",
"skaa",
"highstorm",
"parshendi",
"urithiru",
"honorblade",
"surgebinder",
"dawnshard",
"worldhopper",
"perpendicularity",
"adonalsium",
"chasmfiend",
"worldbringer",
"allomancer",
"highspren",
"elantrian",
"inkspren",
"honorspren",
"cultivationspren",
"peakspren",
"ashspren",
"luckspren",
"windspren",
"lifespren",
"towerlight",
"voidlight",
"brandosando",
"numuhukumakiaki'ilunamor",
"dsnx24",
"dsnx2024",
"dragonsteelnexus",
"dragonsteelnexus2024"
],
"EXCLUDE_TOKENS": [
"trump",
"sylvana",
"sylvanna",
"alleria",
"uriele",
"mormon"
]
}

0 comments on commit d0c7d5c

Please sign in to comment.