diff --git a/CITATION.cff b/CITATION.cff
index 1052734..f730ed1 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -13,7 +13,7 @@ authors:
email: support@claromes.com
identifiers:
- type: doi
- value: 10.5281/zenodo.12528448
+ value: 10.5281/zenodo.12528447
description: The concept DOI of the work.
- type: url
value: "https://pypi.org/project/waybacktweets/"
diff --git a/README.md b/README.md
index 2bd1b23..561d369 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,9 @@
# Wayback Tweets
-[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12528448.svg)](https://doi.org/10.5281/zenodo.12528448) [![PyPI](https://img.shields.io/pypi/v/waybacktweets)](https://pypi.org/project/waybacktweets) [![docs](https://github.com/claromes/waybacktweets/actions/workflows/docs.yml/badge.svg)](https://github.com/claromes/waybacktweets/actions/workflows/docs.yml) [![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://waybacktweets.streamlit.app)
+[![PyPI](https://img.shields.io/pypi/v/waybacktweets)](https://pypi.org/project/waybacktweets) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12528447.svg)](https://doi.org/10.5281/zenodo.12528447) [![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://waybacktweets.streamlit.app) [![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1zRqi6uTMiGi5z8GQ-PC0tbpCJWULCqMO?usp=sharing)
-Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see [Field Options](https://claromes.github.io/waybacktweets/field_options.html)), and saves the data in HTML (for easy viewing of the tweets using the `iframe` tag), CSV, and JSON formats.
+
+Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see [Field Options](https://claromes.github.io/waybacktweets/field_options.html)), and saves the data in HTML, for easy viewing of the tweets using the iframe tags, CSV, and JSON formats.
## Installation
@@ -57,7 +58,7 @@ if archived_tweets:
## Acknowledgements
- Tristan Lee (Bellingcat's Data Scientist) for the idea of the application.
-- Jessica Smith (Snowflake's Marketing Specialist) and Streamlit/Snowflake teams for the additional server resources on Streamlit Cloud.
+- Jessica Smith (Snowflake's Community Growth Specialist) and Streamlit/Snowflake team for the additional server resources on Streamlit Cloud.
- OSINT Community for recommending the application.
> [!NOTE]
diff --git a/app/app.py b/app/app.py
index df41e67..a608dad 100644
--- a/app/app.py
+++ b/app/app.py
@@ -34,7 +34,7 @@
layout="centered",
menu_items={
"About": f"""
- [![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![License](https://img.shields.io/github/license/claromes/waybacktweets)](https://github.com/claromes/waybacktweets/blob/main/LICENSE.md) [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets)
+ [![License](https://img.shields.io/github/license/claromes/waybacktweets)](https://github.com/claromes/waybacktweets/blob/main/LICENSE.md)
The application is a prototype hosted on Streamlit Cloud, serving as an alternative to the command line tool.
@@ -168,16 +168,12 @@ def scroll_page():
# ------ User Interface Settings ------ #
-st.info(
- "🥳 [**Pre-release 1.0x: Python module, CLI, and new Streamlit app**](https://github.com/claromes/waybacktweets/releases)" # noqa: E501
-)
-
st.image(TITLE, use_column_width="never")
st.caption(
- "[![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets)" # noqa: E501
+ "[![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![sponsor](https://img.shields.io/badge/Donate-via%20Sponsors-ff69b4.svg?logo=github)](https://github.com/sponsors/claromes)" # noqa: E501
)
st.write(
- "Retrieves archived tweets CDX data in HTML (for easy viewing of the tweets using the `iframe` tag), CSV, and JSON formats." # noqa: E501
+ "Retrieves archived tweets CDX data in HTML (for easy viewing of the tweets using the iframe tag), CSV, and JSON formats." # noqa: E501
)
st.write(
@@ -291,15 +287,15 @@ def scroll_page():
# -- Rendering -- #
- if csv_data and json_data and html_content:
- st.session_state.count = len(df)
- st.write(f"**{st.session_state.count} URLs have been captured**")
+ st.session_state.count = len(df)
+ st.write(f"**{st.session_state.count} URLs have been captured**")
- # -- HTML -- #
+ tab1, tab2, tab3 = st.tabs(["HTML", "CSV", "JSON"])
- st.header("HTML", divider="gray", anchor=False)
+ # -- HTML -- #
+ with tab1:
st.write(
- f"Visualize tweets more efficiently through `iframes`. Download the @{st.session_state.current_username}'s archived tweets in HTML." # noqa: E501
+ f"Visualize tweets more efficiently through iframe tags. Download the @{st.session_state.current_username}'s archived tweets in HTML." # noqa: E501
)
col5, col6 = st.columns([1, 18])
@@ -317,8 +313,7 @@ def scroll_page():
)
# -- CSV -- #
-
- st.header("CSV", divider="gray", anchor=False)
+ with tab2:
st.write(
"Check the data returned in the dataframe below and download the file."
)
@@ -340,8 +335,7 @@ def scroll_page():
st.dataframe(df, use_container_width=True)
# -- JSON -- #
-
- st.header("JSON", divider="gray", anchor=False)
+ with tab3:
st.write(
"Check the data returned in JSON format below and download the file."
)
diff --git a/docs/conf.py b/docs/conf.py
index 4a4419e..b6304e9 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -20,6 +20,7 @@
"sphinx_new_tab_link",
"sphinx_click.ext",
"sphinx_autodoc_typehints",
+ "sphinxcontrib.youtube",
]
templates_path = ["_templates"]
diff --git a/docs/contribute.rst b/docs/contribute.rst
index 54376ac..edaab0c 100644
--- a/docs/contribute.rst
+++ b/docs/contribute.rst
@@ -19,7 +19,7 @@ These are the prerequisites:
- Python 3.10+
- Poetry
-Install from the source, following the :ref:`installation` instructions.
+Install from the source, following the :ref:`installation_from_source` instructions.
Brief explanation about the code under the Wayback Tweets directory:
diff --git a/docs/handson.rst b/docs/handson.rst
new file mode 100644
index 0000000..79ec10f
--- /dev/null
+++ b/docs/handson.rst
@@ -0,0 +1,22 @@
+Hands-On Examples
+====================
+
+- **Notebook**
+
+ This notebook demonstrates how to fetch, parse, and export archived tweets for a specific user using the ``waybacktweets`` library.
+
+ .. image:: https://colab.research.google.com/assets/colab-badge.svg
+ :target: https://colab.research.google.com/drive/1zRqi6uTMiGi5z8GQ-PC0tbpCJWULCqMO?usp=sharing
+ :alt: Open In Collab
+
+.. raw:: html
+
+
+
+
+- **Video**
+
+ Demonstration of how to use Wayback Tweets and other tools to retrieve tweets (in Spanish)
+
+ .. youtube:: qy3wOnUxe6A
+ :width: 100%
diff --git a/docs/index.rst b/docs/index.rst
index f6a5578..300dec9 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -9,10 +9,11 @@ Wayback Tweets
Pre-release: |release|
-Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see :ref:`field_options`), and saves the data in HTML (for easy viewing of the tweets using the ``iframe`` tag), CSV, and JSON formats.
+Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see :ref:`field_options`), and saves the data in HTML, for easy viewing of the tweets using the iframe tags, CSV, and JSON formats.
-.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.12528448.svg
- :target: https://doi.org/10.5281/zenodo.12528448
+.. image:: https://img.shields.io/badge/Donate-via%20Sponsors-ff69b4.svg?logo=github
+ :target: https://github.com/sponsors/claromes
+ :alt: GitHub Sponsors
.. note::
Intensive queries can lead to rate limiting, resulting in a temporary ban of a few minutes from web.archive.org.
@@ -30,6 +31,7 @@ User Guide
field_options
outputs
exceptions
+ handson
contribute
todo
diff --git a/docs/installation.rst b/docs/installation.rst
index 52e614d..dd35b7a 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -1,8 +1,7 @@
-.. _installation:
-
Installation
================
+**It is compatible with Python versions 3.10 and above.**
Using pip
------------
@@ -11,47 +10,68 @@ Using pip
pip install waybacktweets
+Using Poetry
+------------
+
+ .. code-block:: shell
+
+ poetry add waybacktweets
+
+.. _installation_from_source:
+
From source
-------------
- Clone the repository:
+ **Clone the repository:**
.. code-block:: shell
git clone git@github.com:claromes/waybacktweets.git
- Change directory:
+ **Change directory:**
.. code-block:: shell
cd waybacktweets
- Install poetry, if you haven't already:
+ **Install Poetry, if you haven't already:**
.. code-block:: shell
pip install poetry
- Install the dependencies:
+ **Install the dependencies:**
.. code-block:: shell
poetry install
- Run the CLI:
+ **Install the pre-commit:**
+
+ .. code-block:: shell
+
+ poetry run pre-commit install
+
+ **Run the CLI:**
.. code-block:: shell
poetry run waybacktweets [SUBCOMMANDS]
- Run the Streamlit App:
+ **Starts a new shell and activates the virtual environment:**
+
+ .. code-block:: shell
+
+ poetry shell
+
+ **Run the Streamlit App:**
.. code-block:: shell
streamlit run app/app.py
- Build the docs:
+ **Build the docs:**
.. code-block:: shell
diff --git a/legacy_app/legacy_app.py b/legacy_app/legacy_app.py
index 82059a2..ba2df0e 100644
--- a/legacy_app/legacy_app.py
+++ b/legacy_app/legacy_app.py
@@ -14,11 +14,7 @@
layout="centered",
menu_items={
"About": """
- ## 🏛️ Wayback Tweets
-
- Tool that displays, via Wayback CDX Server API, multiple archived tweets on Wayback Machine to avoid opening each link manually. Users can apply filters based on specific years and view tweets that do not have the original URL available.
-
- This tool is a prototype, please feel free to send your [feedbacks](https://github.com/claromes/waybacktweets/issues). Created by [@claromes](https://claromes.com).
+ This is the legacy application of [Wayback Tweets](https://waybacktweets.streamlit.app/).
-------
""", # noqa: E501
@@ -386,7 +382,7 @@ def next_page():
# UI
st.title(
- "Wayback Tweets [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets)", # noqa: E501
+ "Wayback Tweets", # noqa: E501
anchor=False,
help="v0.4.3",
)
diff --git a/poetry.lock b/poetry.lock
index 825cb2a..05d1e4f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1524,6 +1524,26 @@ lint = ["docutils-stubs", "flake8", "mypy"]
standalone = ["Sphinx (>=5)"]
test = ["pytest"]
+[[package]]
+name = "sphinxcontrib-youtube"
+version = "1.4.1"
+description = "Sphinx \"youtube\" extension."
+optional = false
+python-versions = "*"
+files = [
+ {file = "sphinxcontrib_youtube-1.4.1-py2.py3-none-any.whl", hash = "sha256:de9cb454f066d580a1e7ad64efae7dd9e12c1b1567a31faa330b1aeaeed40460"},
+ {file = "sphinxcontrib_youtube-1.4.1.tar.gz", hash = "sha256:eb7871c8af47fd2b5c9727615354b7f95bce554be8be45b9fa8e5bc022f88059"},
+]
+
+[package.dependencies]
+requests = "*"
+Sphinx = ">=6.1"
+
+[package.extras]
+dev = ["nox"]
+doc = ["pydata-sphinx-theme", "sphinx-copybutton", "sphinx-design"]
+test = ["beautifulsoup4", "pytest", "pytest-cov", "pytest-regressions"]
+
[[package]]
name = "streamlit"
version = "1.36.0"
@@ -1733,4 +1753,4 @@ watchmedo = ["PyYAML (>=3.10)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
-content-hash = "4017fc7af7b13a774406ad205ef03952ef96dc5c3e0413c624c8a459e0619a4c"
+content-hash = "e41f880cd350ecafc461396adeec717dd632a56071c030fab761265acc0773f6"
diff --git a/pyproject.toml b/pyproject.toml
index 9dbd986..7296f3a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "waybacktweets"
-version = "1.0a6"
+version = "1.0a7"
description = "Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing, and saves the data."
authors = ["Claromes "]
license = "GPLv3"
@@ -46,6 +46,7 @@ sphinxcontrib-mermaid = "^0.9.2"
sphinx-new-tab-link = "^0.4.0"
sphinx-click = "^6.0.0"
sphinx-autodoc-typehints = "^2.1.1"
+sphinxcontrib-youtube = "^1.4.1"
[tool.poetry.group.dev.dependencies]
streamlit = "1.36.0"
diff --git a/waybacktweets/_cli.py b/waybacktweets/_cli.py
index 4048fc7..f003efc 100644
--- a/waybacktweets/_cli.py
+++ b/waybacktweets/_cli.py
@@ -97,7 +97,7 @@ def _parse_date(
"verbose",
is_flag=True,
default=False,
- help="Shows the error log.",
+ help="Shows the log.",
)
def main(
username: str,