From 7e91bff956f95539419dbd3b525ce247c12e6a33 Mon Sep 17 00:00:00 2001 From: Claromes Date: Tue, 16 Jul 2024 18:54:19 -0300 Subject: [PATCH 1/5] add Pandas to dep --- poetry.lock | 2 +- pyproject.toml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 05d1e4f..8b553bc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1753,4 +1753,4 @@ watchmedo = ["PyYAML (>=3.10)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "e41f880cd350ecafc461396adeec717dd632a56071c030fab761265acc0773f6" +content-hash = "6ec2a7e5f2ef14da749323fd7fd41018619880340cb46cdcacab7a5d4ec9f852" diff --git a/pyproject.toml b/pyproject.toml index 7296f3a..9c9c686 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "waybacktweets" -version = "1.0a7" +version = "1.0rc" description = "Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing, and saves the data." authors = ["Claromes "] license = "GPLv3" @@ -38,6 +38,7 @@ python = "^3.10" requests = "^2.30.0" rich = "^13.6.0" click = "^8.1.7" +pandas = "^2.2.2" [tool.poetry.group.docs.dependencies] sphinx = "^7.3.7" From e05c7fefabb983224c9a3fde63ca6246332214e9 Mon Sep 17 00:00:00 2001 From: Claromes Date: Tue, 16 Jul 2024 19:08:45 -0300 Subject: [PATCH 2/5] update module viz - add get method --- waybacktweets/api/visualize.py | 40 ++++++++++++++++------------------ 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/waybacktweets/api/visualize.py b/waybacktweets/api/visualize.py index 93ad80a..bb4def0 100644 --- a/waybacktweets/api/visualize.py +++ b/waybacktweets/api/visualize.py @@ -114,12 +114,12 @@ def generate(self) -> str: tweet = self.json_path[index] html += '
\n' - if not tweet["available_tweet_text"]: + if not tweet.get("available_tweet_text"): iframe_src = { - "Archived Tweet": tweet["archived_tweet_url"], - "Parsed Archived Tweet": tweet["parsed_archived_tweet_url"], - "Original Tweet": tweet["original_tweet_url"], - "Parsed Tweet": tweet["parsed_tweet_url"], + "Archived Tweet": tweet.get("archived_tweet_url"), + "Parsed Archived Tweet": tweet.get("parsed_archived_tweet_url"), + "Original Tweet": tweet.get("original_tweet_url"), + "Parsed Tweet": tweet.get("parsed_tweet_url"), } for key, value in iframe_src.items(): @@ -149,25 +149,23 @@ def generate(self) -> str: index=index, url=value, key_cleaned=key_cleaned ) - if tweet["available_tweet_text"]: + if tweet.get("available_tweet_text"): html += "
\n" - html += f'

Available Tweet Content: {tweet["available_tweet_text"]}

\n' - html += f'

Available Tweet Is Retweet: {tweet["available_tweet_is_RT"]}

\n' - html += f'

Available Tweet Username: {tweet["available_tweet_info"]}

\n' + html += f'

Available Tweet Content: {tweet.get("available_tweet_text")}

\n' + html += f'

Available Tweet Is Retweet: {tweet.get("available_tweet_is_RT")}

\n' + html += f'

Available Tweet Username: {tweet.get("available_tweet_info")}

\n' html += "
\n" - html += f'

Archived Tweet: {tweet["archived_tweet_url"]}

\n' - html += f'

Parsed Archived Tweet: {tweet["parsed_archived_tweet_url"]}

\n' - html += f'

Original Tweet: {tweet["original_tweet_url"]}

\n' - html += f'

Parsed Tweet: {tweet["parsed_tweet_url"]}

\n' - html += f'

Archived URL Key: {tweet["archived_urlkey"]}

\n' - html += f'

Archived Timestamp: {timestamp_parser(tweet["archived_timestamp"])} ({tweet["archived_timestamp"]})

\n' - html += f'

Archived mimetype: {tweet["archived_mimetype"]}

\n' - html += f'

Archived Statuscode: {tweet["archived_statuscode"]}

\n' - html += ( - f'

Archived Digest: {tweet["archived_digest"]}\n' - ) - html += f'

Archived Length: {tweet["archived_length"]}

\n' + html += f'

Archived Tweet: {tweet.get("archived_tweet_url")}

\n' + html += f'

Parsed Archived Tweet: {tweet.get("parsed_archived_tweet_url")}

\n' + html += f'

Original Tweet: {tweet.get("original_tweet_url")}

\n' + html += f'

Parsed Tweet: {tweet.get("parsed_tweet_url")}

\n' + html += f'

Archived URL Key: {tweet.get("archived_urlkey")}

\n' + html += f'

Archived Timestamp: {timestamp_parser(tweet.get("archived_timestamp"))} ({tweet.get("archived_timestamp")})

\n' + html += f'

Archived mimetype: {tweet.get("archived_mimetype")}

\n' + html += f'

Archived Statuscode: {tweet.get("archived_statuscode")}

\n' + html += f'

Archived Digest: {tweet.get("archived_digest")}\n' + html += f'

Archived Length: {tweet.get("archived_length")}

\n' html += "
\n" html += "\n\n" # Closes the page div and the container From 595a0f853cc86081d00feb75a8ff6a8e31eb1610 Mon Sep 17 00:00:00 2001 From: Claromes Date: Tue, 16 Jul 2024 21:38:37 -0300 Subject: [PATCH 3/5] fix accordions on firefox --- waybacktweets/api/visualize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/waybacktweets/api/visualize.py b/waybacktweets/api/visualize.py index bb4def0..f679ca5 100644 --- a/waybacktweets/api/visualize.py +++ b/waybacktweets/api/visualize.py @@ -131,7 +131,7 @@ def generate(self) -> str: html += '
\n' html += f'
Loading...
\n' - html += f'\n' + html += f'\n' html += "
\n" html += "\n" From ff434a6065d61016e38d2a7239c0a5ee2763d967 Mon Sep 17 00:00:00 2001 From: Claromes Date: Tue, 16 Jul 2024 21:51:47 -0300 Subject: [PATCH 4/5] update app - allow empty date filter --- app/app.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/app/app.py b/app/app.py index a608dad..6e68929 100644 --- a/app/app.py +++ b/app/app.py @@ -251,6 +251,13 @@ def scroll_page(): collapse = "urlkey" matchtype = "prefix" + start_timestamp = None + end_timestamp = None + + if st.session_state.archived_timestamp_filter: + start_timestamp = st.session_state.archived_timestamp_filter[0] + end_timestamp = st.session_state.archived_timestamp_filter[1] + try: with st.spinner( f"Waybacking @{st.session_state.current_username}'s archived tweets" @@ -258,8 +265,8 @@ def scroll_page(): wayback_tweets = wayback_tweets( st.session_state.current_username, collapse, - st.session_state.archived_timestamp_filter[0], - st.session_state.archived_timestamp_filter[1], + start_timestamp, + end_timestamp, limit, offset, matchtype, From f3b58a0d261e33b851bb5399c5e0aa03cb919ce2 Mon Sep 17 00:00:00 2001 From: Claromes Date: Thu, 18 Jul 2024 20:16:03 -0300 Subject: [PATCH 5/5] update dev status --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9c9c686..5b98d01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ keywords = [ "command-line", ] classifiers = [ - "Development Status :: 3 - Alpha", + "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",