From 95850a22dc0684c94494e2df5d953f05018f70b3 Mon Sep 17 00:00:00 2001 From: Claromes Date: Thu, 4 Jul 2024 16:04:08 -0300 Subject: [PATCH 1/3] app - add tabs, lagacy app - update descriptions, module - update CLI help text, add Donate button, add hands-on docs page --- README.md | 7 ++++--- app/app.py | 28 +++++++++++----------------- docs/conf.py | 1 + docs/handson.rst | 22 ++++++++++++++++++++++ docs/index.rst | 8 +++++--- legacy_app/legacy_app.py | 8 ++------ poetry.lock | 22 +++++++++++++++++++++- pyproject.toml | 3 ++- waybacktweets/_cli.py | 2 +- 9 files changed, 69 insertions(+), 32 deletions(-) create mode 100644 docs/handson.rst diff --git a/README.md b/README.md index 2bd1b23..8668792 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ # Wayback Tweets -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12528448.svg)](https://doi.org/10.5281/zenodo.12528448) [![PyPI](https://img.shields.io/pypi/v/waybacktweets)](https://pypi.org/project/waybacktweets) [![docs](https://github.com/claromes/waybacktweets/actions/workflows/docs.yml/badge.svg)](https://github.com/claromes/waybacktweets/actions/workflows/docs.yml) [![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://waybacktweets.streamlit.app) +[![PyPI](https://img.shields.io/pypi/v/waybacktweets)](https://pypi.org/project/waybacktweets) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12528448.svg)](https://doi.org/10.5281/zenodo.12528448) [![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://waybacktweets.streamlit.app) [![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1zRqi6uTMiGi5z8GQ-PC0tbpCJWULCqMO?usp=sharing) -Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see [Field Options](https://claromes.github.io/waybacktweets/field_options.html)), and saves the data in HTML (for easy viewing of the tweets using the `iframe` tag), CSV, and JSON formats. + +Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see [Field Options](https://claromes.github.io/waybacktweets/field_options.html)), and saves the data in HTML, for easy viewing of the tweets using the iframe tags, CSV, and JSON formats. ## Installation @@ -57,7 +58,7 @@ if archived_tweets: ## Acknowledgements - Tristan Lee (Bellingcat's Data Scientist) for the idea of the application. -- Jessica Smith (Snowflake's Marketing Specialist) and Streamlit/Snowflake teams for the additional server resources on Streamlit Cloud. +- Jessica Smith (Snowflake's Community Growth Specialist) and Streamlit/Snowflake team for the additional server resources on Streamlit Cloud. - OSINT Community for recommending the application. > [!NOTE] diff --git a/app/app.py b/app/app.py index df41e67..a608dad 100644 --- a/app/app.py +++ b/app/app.py @@ -34,7 +34,7 @@ layout="centered", menu_items={ "About": f""" - [![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![License](https://img.shields.io/github/license/claromes/waybacktweets)](https://github.com/claromes/waybacktweets/blob/main/LICENSE.md) [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets) + [![License](https://img.shields.io/github/license/claromes/waybacktweets)](https://github.com/claromes/waybacktweets/blob/main/LICENSE.md) The application is a prototype hosted on Streamlit Cloud, serving as an alternative to the command line tool. @@ -168,16 +168,12 @@ def scroll_page(): # ------ User Interface Settings ------ # -st.info( - "🥳 [**Pre-release 1.0x: Python module, CLI, and new Streamlit app**](https://github.com/claromes/waybacktweets/releases)" # noqa: E501 -) - st.image(TITLE, use_column_width="never") st.caption( - "[![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets)" # noqa: E501 + "[![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![sponsor](https://img.shields.io/badge/Donate-via%20Sponsors-ff69b4.svg?logo=github)](https://github.com/sponsors/claromes)" # noqa: E501 ) st.write( - "Retrieves archived tweets CDX data in HTML (for easy viewing of the tweets using the `iframe` tag), CSV, and JSON formats." # noqa: E501 + "Retrieves archived tweets CDX data in HTML (for easy viewing of the tweets using the iframe tag), CSV, and JSON formats." # noqa: E501 ) st.write( @@ -291,15 +287,15 @@ def scroll_page(): # -- Rendering -- # - if csv_data and json_data and html_content: - st.session_state.count = len(df) - st.write(f"**{st.session_state.count} URLs have been captured**") + st.session_state.count = len(df) + st.write(f"**{st.session_state.count} URLs have been captured**") - # -- HTML -- # + tab1, tab2, tab3 = st.tabs(["HTML", "CSV", "JSON"]) - st.header("HTML", divider="gray", anchor=False) + # -- HTML -- # + with tab1: st.write( - f"Visualize tweets more efficiently through `iframes`. Download the @{st.session_state.current_username}'s archived tweets in HTML." # noqa: E501 + f"Visualize tweets more efficiently through iframe tags. Download the @{st.session_state.current_username}'s archived tweets in HTML." # noqa: E501 ) col5, col6 = st.columns([1, 18]) @@ -317,8 +313,7 @@ def scroll_page(): ) # -- CSV -- # - - st.header("CSV", divider="gray", anchor=False) + with tab2: st.write( "Check the data returned in the dataframe below and download the file." ) @@ -340,8 +335,7 @@ def scroll_page(): st.dataframe(df, use_container_width=True) # -- JSON -- # - - st.header("JSON", divider="gray", anchor=False) + with tab3: st.write( "Check the data returned in JSON format below and download the file." ) diff --git a/docs/conf.py b/docs/conf.py index 4a4419e..b6304e9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,6 +20,7 @@ "sphinx_new_tab_link", "sphinx_click.ext", "sphinx_autodoc_typehints", + "sphinxcontrib.youtube", ] templates_path = ["_templates"] diff --git a/docs/handson.rst b/docs/handson.rst new file mode 100644 index 0000000..79ec10f --- /dev/null +++ b/docs/handson.rst @@ -0,0 +1,22 @@ +Hands-On Examples +==================== + +- **Notebook** + + This notebook demonstrates how to fetch, parse, and export archived tweets for a specific user using the ``waybacktweets`` library. + + .. image:: https://colab.research.google.com/assets/colab-badge.svg + :target: https://colab.research.google.com/drive/1zRqi6uTMiGi5z8GQ-PC0tbpCJWULCqMO?usp=sharing + :alt: Open In Collab + +.. raw:: html + +
+
+ +- **Video** + + Demonstration of how to use Wayback Tweets and other tools to retrieve tweets (in Spanish) + + .. youtube:: qy3wOnUxe6A + :width: 100% diff --git a/docs/index.rst b/docs/index.rst index f6a5578..300dec9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,10 +9,11 @@ Wayback Tweets Pre-release: |release| -Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see :ref:`field_options`), and saves the data in HTML (for easy viewing of the tweets using the ``iframe`` tag), CSV, and JSON formats. +Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see :ref:`field_options`), and saves the data in HTML, for easy viewing of the tweets using the iframe tags, CSV, and JSON formats. -.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.12528448.svg - :target: https://doi.org/10.5281/zenodo.12528448 +.. image:: https://img.shields.io/badge/Donate-via%20Sponsors-ff69b4.svg?logo=github + :target: https://github.com/sponsors/claromes + :alt: GitHub Sponsors .. note:: Intensive queries can lead to rate limiting, resulting in a temporary ban of a few minutes from web.archive.org. @@ -30,6 +31,7 @@ User Guide field_options outputs exceptions + handson contribute todo diff --git a/legacy_app/legacy_app.py b/legacy_app/legacy_app.py index 82059a2..ba2df0e 100644 --- a/legacy_app/legacy_app.py +++ b/legacy_app/legacy_app.py @@ -14,11 +14,7 @@ layout="centered", menu_items={ "About": """ - ## 🏛️ Wayback Tweets - - Tool that displays, via Wayback CDX Server API, multiple archived tweets on Wayback Machine to avoid opening each link manually. Users can apply filters based on specific years and view tweets that do not have the original URL available. - - This tool is a prototype, please feel free to send your [feedbacks](https://github.com/claromes/waybacktweets/issues). Created by [@claromes](https://claromes.com). + This is the legacy application of [Wayback Tweets](https://waybacktweets.streamlit.app/). ------- """, # noqa: E501 @@ -386,7 +382,7 @@ def next_page(): # UI st.title( - "Wayback Tweets [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets)", # noqa: E501 + "Wayback Tweets", # noqa: E501 anchor=False, help="v0.4.3", ) diff --git a/poetry.lock b/poetry.lock index 825cb2a..05d1e4f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1524,6 +1524,26 @@ lint = ["docutils-stubs", "flake8", "mypy"] standalone = ["Sphinx (>=5)"] test = ["pytest"] +[[package]] +name = "sphinxcontrib-youtube" +version = "1.4.1" +description = "Sphinx \"youtube\" extension." +optional = false +python-versions = "*" +files = [ + {file = "sphinxcontrib_youtube-1.4.1-py2.py3-none-any.whl", hash = "sha256:de9cb454f066d580a1e7ad64efae7dd9e12c1b1567a31faa330b1aeaeed40460"}, + {file = "sphinxcontrib_youtube-1.4.1.tar.gz", hash = "sha256:eb7871c8af47fd2b5c9727615354b7f95bce554be8be45b9fa8e5bc022f88059"}, +] + +[package.dependencies] +requests = "*" +Sphinx = ">=6.1" + +[package.extras] +dev = ["nox"] +doc = ["pydata-sphinx-theme", "sphinx-copybutton", "sphinx-design"] +test = ["beautifulsoup4", "pytest", "pytest-cov", "pytest-regressions"] + [[package]] name = "streamlit" version = "1.36.0" @@ -1733,4 +1753,4 @@ watchmedo = ["PyYAML (>=3.10)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "4017fc7af7b13a774406ad205ef03952ef96dc5c3e0413c624c8a459e0619a4c" +content-hash = "e41f880cd350ecafc461396adeec717dd632a56071c030fab761265acc0773f6" diff --git a/pyproject.toml b/pyproject.toml index 9dbd986..7296f3a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "waybacktweets" -version = "1.0a6" +version = "1.0a7" description = "Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing, and saves the data." authors = ["Claromes "] license = "GPLv3" @@ -46,6 +46,7 @@ sphinxcontrib-mermaid = "^0.9.2" sphinx-new-tab-link = "^0.4.0" sphinx-click = "^6.0.0" sphinx-autodoc-typehints = "^2.1.1" +sphinxcontrib-youtube = "^1.4.1" [tool.poetry.group.dev.dependencies] streamlit = "1.36.0" diff --git a/waybacktweets/_cli.py b/waybacktweets/_cli.py index 4048fc7..f003efc 100644 --- a/waybacktweets/_cli.py +++ b/waybacktweets/_cli.py @@ -97,7 +97,7 @@ def _parse_date( "verbose", is_flag=True, default=False, - help="Shows the error log.", + help="Shows the log.", ) def main( username: str, From 2b2169bb7da7b0613d54782ccc97cb2c7bfe4ed8 Mon Sep 17 00:00:00 2001 From: Claromes Date: Thu, 4 Jul 2024 16:34:46 -0300 Subject: [PATCH 2/3] update docs installation --- docs/contribute.rst | 2 +- docs/installation.rst | 38 +++++++++++++++++++++++++++++--------- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/docs/contribute.rst b/docs/contribute.rst index 54376ac..edaab0c 100644 --- a/docs/contribute.rst +++ b/docs/contribute.rst @@ -19,7 +19,7 @@ These are the prerequisites: - Python 3.10+ - Poetry -Install from the source, following the :ref:`installation` instructions. +Install from the source, following the :ref:`installation_from_source` instructions. Brief explanation about the code under the Wayback Tweets directory: diff --git a/docs/installation.rst b/docs/installation.rst index 52e614d..dd35b7a 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -1,8 +1,7 @@ -.. _installation: - Installation ================ +**It is compatible with Python versions 3.10 and above.** Using pip ------------ @@ -11,47 +10,68 @@ Using pip pip install waybacktweets +Using Poetry +------------ + + .. code-block:: shell + + poetry add waybacktweets + +.. _installation_from_source: + From source ------------- - Clone the repository: + **Clone the repository:** .. code-block:: shell git clone git@github.com:claromes/waybacktweets.git - Change directory: + **Change directory:** .. code-block:: shell cd waybacktweets - Install poetry, if you haven't already: + **Install Poetry, if you haven't already:** .. code-block:: shell pip install poetry - Install the dependencies: + **Install the dependencies:** .. code-block:: shell poetry install - Run the CLI: + **Install the pre-commit:** + + .. code-block:: shell + + poetry run pre-commit install + + **Run the CLI:** .. code-block:: shell poetry run waybacktweets [SUBCOMMANDS] - Run the Streamlit App: + **Starts a new shell and activates the virtual environment:** + + .. code-block:: shell + + poetry shell + + **Run the Streamlit App:** .. code-block:: shell streamlit run app/app.py - Build the docs: + **Build the docs:** .. code-block:: shell From 7aa3972590852596b1e2023f7dd0dd13f4b1364e Mon Sep 17 00:00:00 2001 From: Claromes Date: Thu, 4 Jul 2024 16:44:15 -0300 Subject: [PATCH 3/3] update doi --- CITATION.cff | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 1052734..f730ed1 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -13,7 +13,7 @@ authors: email: support@claromes.com identifiers: - type: doi - value: 10.5281/zenodo.12528448 + value: 10.5281/zenodo.12528447 description: The concept DOI of the work. - type: url value: "https://pypi.org/project/waybacktweets/" diff --git a/README.md b/README.md index 8668792..561d369 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Wayback Tweets -[![PyPI](https://img.shields.io/pypi/v/waybacktweets)](https://pypi.org/project/waybacktweets) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12528448.svg)](https://doi.org/10.5281/zenodo.12528448) [![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://waybacktweets.streamlit.app) [![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1zRqi6uTMiGi5z8GQ-PC0tbpCJWULCqMO?usp=sharing) +[![PyPI](https://img.shields.io/pypi/v/waybacktweets)](https://pypi.org/project/waybacktweets) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12528447.svg)](https://doi.org/10.5281/zenodo.12528447) [![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://waybacktweets.streamlit.app) [![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1zRqi6uTMiGi5z8GQ-PC0tbpCJWULCqMO?usp=sharing) Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see [Field Options](https://claromes.github.io/waybacktweets/field_options.html)), and saves the data in HTML, for easy viewing of the tweets using the iframe tags, CSV, and JSON formats.