From 0f4b45623187e8b649bb9f8d945b92a763ad60a9 Mon Sep 17 00:00:00 2001 From: Isabel Brador <59972166+izzybrador@users.noreply.github.com> Date: Fri, 3 Jan 2025 15:11:23 -0500 Subject: [PATCH] Delete Data Sets/Web Archives/LOC_Election_Dataset_Walkthrough.ipynb Deleting original us elections data walkthrough notebook, new one has been created in it's place. --- .../LOC_Election_Dataset_Walkthrough.ipynb | 809 ------------------ 1 file changed, 809 deletions(-) delete mode 100644 Data Sets/Web Archives/LOC_Election_Dataset_Walkthrough.ipynb diff --git a/Data Sets/Web Archives/LOC_Election_Dataset_Walkthrough.ipynb b/Data Sets/Web Archives/LOC_Election_Dataset_Walkthrough.ipynb deleted file mode 100644 index 8081dce..0000000 --- a/Data Sets/Web Archives/LOC_Election_Dataset_Walkthrough.ipynb +++ /dev/null @@ -1,809 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Exploring the United States Election Datasets\n", - "\n", - "This notebook demonstrates an intermediate approach to exploring and accessing the United States Election Datasets produced by the Library of Congress.\n", - "\n", - "As an intermediate approach, this notebook uses functions as reusable blocks of code. These functions could potentially be used outside of this notebook to assist in further exploration." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Imports\n", - "Below are the Python packages used in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import requests # Used to make HTTP requests; this is how we will scrape the Wayback\n", - "from bs4 import BeautifulSoup # Used to process the scraped content \n", - "import gzip # Used to decompress the gzipped CDX files\n", - "import pandas as pd # Used to transform the data into dataframes for computation\n", - "from sklearn.feature_extraction.text import CountVectorizer # Used to create a matrix out of a bag of words\n", - "from time import sleep # Used to provide a slight pause between Wayback requests" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Defining Global Variables\n", - "Here we’re defining two global variables that will be used in the functions to gather and scrape the content.\n", - "The represent the base of the URL structure for two of the important web applications we will use: Cloudfront, which is the front end to where the dataset files are location and Wayback, where the actual web resources are accessed." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "CLOUDFRONT_BASE_URL = 'https://d2rxokvmqqcpq7.cloudfront.net/'\n", - "WAYBACK_BASE_URL = 'https://webarchive.loc.gov/all/'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Functions\n", - "Here we will define the functions in the order that they are used in this notebook. " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "def gather_files_from_manifest(year: str):\n", - " \"\"\"\n", - " Function that takes a year (YYYY) as an argument.\n", - " The function collects the locations of the CDX files \n", - " listed by the provided year's manifest.\n", - " \n", - " Args:\n", - " year (str): String of a year YYYY.\n", - "\n", - " Returns:\n", - " :obj:`list` of :obj:`str` of individual CDX file URLs.\n", - " \"\"\"\n", - " \n", - " election_years = [\n", - " \"2000\",\n", - " \"2002\",\n", - " \"2004\",\n", - " \"2006\",\n", - " \"2008\",\n", - " \"2010\",\n", - " \"2012\",\n", - " \"2014\",\n", - " \"2016\",\n", - " \"2018\"\n", - " ]\n", - " \n", - " if year not in election_years:\n", - " return []\n", - " else:\n", - " manifest_url = f\"{CLOUDFRONT_BASE_URL}cdx11-indexes/{year}/el{year[-2:]}-manifest.html\"\n", - " response = requests.get(manifest_url)\n", - " soup = BeautifulSoup(response.content, 'html.parser')\n", - " cdx_files = [link.get('href') for link in soup.find_all('a')]\n", - "\n", - " return cdx_files\n", - "\n", - "def fetch_file(cdx_url: str):\n", - " \"\"\"\n", - " Function that takes a `String` as an argument.\n", - " The `cdx_url` is a singular item from the result\n", - " of the `gather_files_from_manifest` function.\n", - " The function fetches the gzipped CDX file, decompresses it,\n", - " splits it on the newlines, and removes the header. \n", - " Args:\n", - " cdx_url (str): Individual item from the result of\n", - " the `gather_files_from_manifest` function.\n", - "\n", - " Returns:\n", - " :obj:`list` of :obj:`str` of individual CDX lines, each representing\n", - " a web object.\n", - " \"\"\"\n", - " response = requests.get(cdx_url)\n", - " \n", - " # Here we decompress the gzipped CDX, decode it, split it on the newline, and remove the header\n", - " cdx_content = gzip.decompress(response.content).decode('utf-8').split('\\n')[1:]\n", - " \n", - " return cdx_content\n", - "\n", - "def create_dataframe(data: list):\n", - " \"\"\"\n", - " Function that takes a :obj:`list` of :obj:`str` as an argument.\n", - " `data` is the contents of the CDX file split on newlines. \n", - " This function takes `data`, applies a schema to it, and transforms it\n", - " into a `pandas.DataFrame`.\n", - " Args:\n", - " data (list): :obj:`list` of :obj:`str`. Each item is a line from\n", - " a CDX file or group of files.\n", - "\n", - " Returns:\n", - " A `pandas.DataFrame` of a CDX file or group of files.\n", - " \"\"\"\n", - " schema = [\n", - " 'urlkey',\n", - " 'timestamp',\n", - " 'original',\n", - " 'mimetype',\n", - " 'statuscode',\n", - " 'digest',\n", - " 'redirect',\n", - " 'metatags',\n", - " 'file_size',\n", - " 'offset',\n", - " 'warc_filename'\n", - " ]\n", - " _data = [row.split() for row in data]\n", - " \n", - " df = pd.DataFrame(_data, columns=schema)\n", - " \n", - " return df\n", - "\n", - "def create_dataframe_from_manifest(manifest: list):\n", - " \"\"\"\n", - " Function that takes a :obj:`list` of :obj:`str` as an argument.\n", - " The `manifest` is a list of all the individual CDX files found\n", - " from an Election year's or group of Election years' HTML manifest.\n", - " This function loops through each file, transforms it into a `pandas.DataFrame`\n", - " by calling the `create_dataframe` function, concats the DataFrames together,\n", - " and then returns the Dataframe representing the entire manifest.\n", - " Args:\n", - " manifest (list): :obj:`list` of :obj:`str` of all the individual CDX files found\n", - " from an Election year's or group of Election years' HTML manifest.\n", - "\n", - " Returns:\n", - " `pandas.DataFrame` representing every file present in the `manifest`.\n", - " \"\"\"\n", - " df = pd.DataFrame() \n", - " for index, line in enumerate(manifest[0:2]):\n", - " cdx = fetch_file(line)\n", - " if index == 0:\n", - " df = create_dataframe(cdx)\n", - " else:\n", - " df = pd.concat([df, create_dataframe(cdx)])\n", - " return df\n", - "\n", - "def fetch_text(row: pd.Series):\n", - " \"\"\"\n", - " Function that takes a `pandas.Series`, which is a single row \n", - " from a `pandas.DataFrame`, as an argument.\n", - " The functions uses the timestamp and original fields from the `row`\n", - " to request the specific resource. Once the resource is fetched,\n", - " the Wayback banner div elements are removed so as to not detract from \n", - " the words in the resource itself. \n", - " Args:\n", - " row (pandas.Series): `pandas.Series`, which is a single row \n", - " from a `pandas.DataFrame`.\n", - "\n", - " Returns:\n", - " `String` of the resource's text.\n", - " \"\"\"\n", - " response = requests.get(f\"{WAYBACK_BASE_URL}{row['timestamp']}/{row['original']}\")\n", - " soup = BeautifulSoup(response.content, 'html.parser')\n", - " [el.extract() for el in soup.find_all('div', {'id': 'wm-maximized'})]\n", - " [el.extract() for el in soup.find_all('div', {'id': 'wm-minimized'})]\n", - " return soup.text\n", - "\n", - "def fetch_all_text(df: pd.DataFrame):\n", - " \"\"\"\n", - " Function that takes a `pandas.Dataframe` as an argument.\n", - " This is the most complicated function here. The function first cleans the\n", - " `df` that was passed in by dropping all the rows that do not have a value in the\n", - " mimetype field. Then, it drops all the duplicate digests, which removes resources\n", - " that are exactly the same. Finally, it only returns rows that have 'text' in the mimetype\n", - " and have a '200' HTTP status response, meaning the resource was sucessfully capture.\n", - " Once the `df` is cleaned, each resource's text is fetched from the Wayback,\n", - " transformed into a matrix using `sklearn.CountVectorizer`, and then returns a `pandas.DataFrame`\n", - " with words and their occurance per resource. A politeness of 15 seconds is added between Wayback requests.\n", - " Args:\n", - " row (pandas.DataFrame): `pandas.Dataframe` representing web resources as CDX lines.\n", - "\n", - " Returns:\n", - " `pandas.Dataframe` of the resource's words tabulated per web resource.\n", - " \"\"\"\n", - " countvec = CountVectorizer(ngram_range=(1,1), stop_words='english')\n", - " unprocessed_bag_of_words = []\n", - " text_df = df\\\n", - " .dropna(subset=['mimetype'])\\\n", - " .drop_duplicates(subset=['digest'])\\\n", - " .query('mimetype.str.contains(\"text\") and statuscode.str.match(\"200\")', engine='python')\n", - " \n", - " for i, row in text_df.iterrows():\n", - " unprocessed_bag_of_words.append(fetch_text(row))\n", - " sleep(15)\n", - " \n", - " processed_bag_of_words = countvec.fit_transform(unprocessed_bag_of_words)\n", - " \n", - " return pd.DataFrame(processed_bag_of_words.toarray(),columns=countvec.get_feature_names())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Gathering the list of CDX Files\n", - "\n", - "The first step is gathering the list of CDX files. To do that, simply call the `gather_files_from_manifest` function, providing the Election year as an argument." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "el00_files = gather_files_from_manifest('2000')" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['https://d2rxokvmqqcpq7.cloudfront.net/cdx11-indexes/2000/unique.20010415093936.surt.cdx.gz',\n", - " 'https://d2rxokvmqqcpq7.cloudfront.net/cdx11-indexes/2000/unique.20010415094743.surt.cdx.gz',\n", - " 'https://d2rxokvmqqcpq7.cloudfront.net/cdx11-indexes/2000/unique.20010415095044.surt.cdx.gz',\n", - " 'https://d2rxokvmqqcpq7.cloudfront.net/cdx11-indexes/2000/unique.20010415095244.surt.cdx.gz',\n", - " 'https://d2rxokvmqqcpq7.cloudfront.net/cdx11-indexes/2000/unique.20010415095459.surt.cdx.gz']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "el00_files[:5]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### CDX File\n", - "Next, we'll demonstrate what a particular CDX File looks like" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "cdx = fetch_file(el00_files[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['com,voter)/home/candidates/info/0,1214,2-11880-,00.html 20001002182124 http://www.voter.com:80/home/candidates/info/0,1214,2-11880-,00.html text/html 200 FYXP43MQC5GVBQMVK3ETWSPXUBR5ICKP - - 5051 149 unique.20010415093936.arc.gz',\n", - " 'com,voter)/home/candidates/info/0,1214,2-18885-,00.html 20001002185814 http://www.voter.com:80/home/candidates/info/0,1214,2-18885-,00.html text/html 200 H6QN5ZULJ6YZP756QNVM3YXKXC7HZUIL - - 4829 5200 unique.20010415093936.arc.gz',\n", - " 'com,voter)/home/candidates/info/0,1214,2-18880-,00.html 20001002185815 http://www.voter.com:80/home/candidates/info/0,1214,2-18880-,00.html text/html 200 HFG67JI4KBPHFXMQE5DJRHF3OEKKBOO6 - - 4794 10029 unique.20010415093936.arc.gz',\n", - " 'com,voter)/home/officials/general/1,1195,2-2467-,00.html 20001002185815 http://voter.com:80/home/officials/general/1,1195,2-2467-,00.html text/html 200 HZJFLTHZD5MGEPJS2WVGBHQRQUPFBE3O - - 5282 14823 unique.20010415093936.arc.gz',\n", - " 'com,voter)/home/candidates/info/0,1214,2-18886-,00.html 20001002185816 http://www.voter.com:80/home/candidates/info/0,1214,2-18886-,00.html text/html 200 QAM7JW7S4CNYMP6HLA6DASOXTO2SIGWO - - 4823 20105 unique.20010415093936.arc.gz']" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cdx[:5]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### CDX as a DataFrame\n", - "Now, here is the same CDX transformed into a DataFrame" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "cdx_df = create_dataframe(cdx)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
urlkeytimestamporiginalmimetypestatuscodedigestredirectmetatagsfile_sizeoffsetwarc_filename
0com,voter)/home/candidates/info/0,1214,2-11880...20001002182124http://www.voter.com:80/home/candidates/info/0...text/html200FYXP43MQC5GVBQMVK3ETWSPXUBR5ICKP--5051149unique.20010415093936.arc.gz
1com,voter)/home/candidates/info/0,1214,2-18885...20001002185814http://www.voter.com:80/home/candidates/info/0...text/html200H6QN5ZULJ6YZP756QNVM3YXKXC7HZUIL--48295200unique.20010415093936.arc.gz
2com,voter)/home/candidates/info/0,1214,2-18880...20001002185815http://www.voter.com:80/home/candidates/info/0...text/html200HFG67JI4KBPHFXMQE5DJRHF3OEKKBOO6--479410029unique.20010415093936.arc.gz
3com,voter)/home/officials/general/1,1195,2-246...20001002185815http://voter.com:80/home/officials/general/1,1...text/html200HZJFLTHZD5MGEPJS2WVGBHQRQUPFBE3O--528214823unique.20010415093936.arc.gz
4com,voter)/home/candidates/info/0,1214,2-18886...20001002185816http://www.voter.com:80/home/candidates/info/0...text/html200QAM7JW7S4CNYMP6HLA6DASOXTO2SIGWO--482320105unique.20010415093936.arc.gz
....................................
1096875com,voter)/home/candidates/info/0,1214,2-9118-...20001002183052http://www.voter.com:80/home/candidates/info/0...--3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ--118145323588unique.20010415093936.arc.gz
1096876com,voter)/home/candidates/info/0,1214,2-9115-...20001002183052http://www.voter.com:80/home/candidates/info/0...--3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ--118145323706unique.20010415093936.arc.gz
1096877com,voter)/home/candidates/info/0,1214,2-15361...20001002182249http://www.voter.com:80/home/candidates/info/0...--3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ--119145323824unique.20010415093936.arc.gz
1096878com,voter)/home/candidates/info/0,1214,2-12994...20001002181842http://www.voter.com:80/home/candidates/info/0...text/html404UDSH36NBYWO2X73LNMX2LEHLNQ7FYXHZ--351145323943unique.20010415093936.arc.gz
1096879NoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNone
\n", - "

1096880 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " urlkey timestamp \\\n", - "0 com,voter)/home/candidates/info/0,1214,2-11880... 20001002182124 \n", - "1 com,voter)/home/candidates/info/0,1214,2-18885... 20001002185814 \n", - "2 com,voter)/home/candidates/info/0,1214,2-18880... 20001002185815 \n", - "3 com,voter)/home/officials/general/1,1195,2-246... 20001002185815 \n", - "4 com,voter)/home/candidates/info/0,1214,2-18886... 20001002185816 \n", - "... ... ... \n", - "1096875 com,voter)/home/candidates/info/0,1214,2-9118-... 20001002183052 \n", - "1096876 com,voter)/home/candidates/info/0,1214,2-9115-... 20001002183052 \n", - "1096877 com,voter)/home/candidates/info/0,1214,2-15361... 20001002182249 \n", - "1096878 com,voter)/home/candidates/info/0,1214,2-12994... 20001002181842 \n", - "1096879 None None \n", - "\n", - " original mimetype \\\n", - "0 http://www.voter.com:80/home/candidates/info/0... text/html \n", - "1 http://www.voter.com:80/home/candidates/info/0... text/html \n", - "2 http://www.voter.com:80/home/candidates/info/0... text/html \n", - "3 http://voter.com:80/home/officials/general/1,1... text/html \n", - "4 http://www.voter.com:80/home/candidates/info/0... text/html \n", - "... ... ... \n", - "1096875 http://www.voter.com:80/home/candidates/info/0... - \n", - "1096876 http://www.voter.com:80/home/candidates/info/0... - \n", - "1096877 http://www.voter.com:80/home/candidates/info/0... - \n", - "1096878 http://www.voter.com:80/home/candidates/info/0... text/html \n", - "1096879 None None \n", - "\n", - " statuscode digest redirect metatags \\\n", - "0 200 FYXP43MQC5GVBQMVK3ETWSPXUBR5ICKP - - \n", - "1 200 H6QN5ZULJ6YZP756QNVM3YXKXC7HZUIL - - \n", - "2 200 HFG67JI4KBPHFXMQE5DJRHF3OEKKBOO6 - - \n", - "3 200 HZJFLTHZD5MGEPJS2WVGBHQRQUPFBE3O - - \n", - "4 200 QAM7JW7S4CNYMP6HLA6DASOXTO2SIGWO - - \n", - "... ... ... ... ... \n", - "1096875 - 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - \n", - "1096876 - 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - \n", - "1096877 - 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - \n", - "1096878 404 UDSH36NBYWO2X73LNMX2LEHLNQ7FYXHZ - - \n", - "1096879 None None None None \n", - "\n", - " file_size offset warc_filename \n", - "0 5051 149 unique.20010415093936.arc.gz \n", - "1 4829 5200 unique.20010415093936.arc.gz \n", - "2 4794 10029 unique.20010415093936.arc.gz \n", - "3 5282 14823 unique.20010415093936.arc.gz \n", - "4 4823 20105 unique.20010415093936.arc.gz \n", - "... ... ... ... \n", - "1096875 118 145323588 unique.20010415093936.arc.gz \n", - "1096876 118 145323706 unique.20010415093936.arc.gz \n", - "1096877 119 145323824 unique.20010415093936.arc.gz \n", - "1096878 351 145323943 unique.20010415093936.arc.gz \n", - "1096879 None None None \n", - "\n", - "[1096880 rows x 11 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cdx_df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Election 2000 DataFrame\n", - "Now we'll just create a DataFrame from the whole 2000 Election dataset. To do that, we'll use the `create_dataframe_from_manifest` which loops over the files and calls `create_dataframe` programmatically instead of manually and individually as we did above. " - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "el00_df = create_dataframe_from_manifest(el00_files)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Mimetypes\n", - "For this exercise, we're going to take a brief look at the mimetypes. First, we'll select all the mimetypes in the Dataframe and get their sums by calling `value_counts` which is a method from Pandas. " - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "el00_mimetypes = el00_df['mimetype'].value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "- 1094810\n", - "text/html 6209\n", - "application/pdf 13\n", - "image/gif 11\n", - "image/jpeg 4\n", - "text/plain 3\n", - "application/mac-binhex40 1\n", - "Name: mimetype, dtype: int64" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "el00_mimetypes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Dropping and Graphing\n", - "You can see from the output above, that the majority of the mimetypes are not listed and show up as `-`. Further research into this could prove interesting. For now, however, we're just going to drop them and graph the remainder." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUUAAAGnCAYAAAAg+EXUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAkyElEQVR4nO3de7hdVX3u8e8rICAS5RIoTdCgJ2KBCkjEqDz1gpVYLdAKGqslrbRpkVY96qnQnh6PF061p7UVK1jqhVCpNN5TFZST4gVFcQe5yCWSCkJKSuI9XkCB9/wxx5aRnZW9185lzrmy38/zrGetNdaaa/027Lx7jjnGHFO2iYiIxkO6LiAiok8SihERlYRiREQloRgRUUkoRkRUEooREZVduy5gKvvvv7/nzZvXdRkRsZNZtWrVt23Pntje+1CcN28eY2NjXZcRETsZSd8a1J7uc0REJaEYEVFJKEZEVBKKERGVhGJERCWhGBFRSShGRFQSihERlYRiREQloRgRUUkoRkRUen/u83TMO+uTO/Tzb3/L83bo50dE97KnGBFRSShGRFQSihERlYRiRERlqFCU9EhJH5J0i6SbJT1F0r6SLpd0a7nfp3r/2ZLWSFot6YSq/RhJN5TXzpWkHfFDRURsrWH3FN8OXGb78cCRwM3AWcBK2/OBleU5kg4DFgOHA4uA8yTtUj7nfGApML/cFm2nnyMiYruYMhQlzQJ+DXgPgO2f2f4+cBKwrLxtGXByeXwScInte23fBqwBjpV0EDDL9lW2DVxUbRMR0QvD7Ck+BtgAvE/S1yS9W9JewIG21wGU+wPK++cAd1bbry1tc8rjie0REb0xTCjuCjwRON/20cCPKV3lLRh0nNCTtG/+AdJSSWOSxjZs2DBEiRER28cwobgWWGv7K+X5h2hC8u7SJabcr6/ef3C1/VzgrtI+d0D7ZmxfYHuB7QWzZ292BcKIiB1mylC0/V/AnZIOLU3HAzcBK4AlpW0J8PHyeAWwWNLukg6hGVC5unSxN0paWEadT6u2iYjohWHPff5T4GJJDwW+Cfw+TaAul3Q6cAdwKoDtGyUtpwnO+4Azbd9fPucM4EJgT+DScouI6I2hQtH2tcCCAS8dv4X3nwOcM6B9DDhiGvVFRLQqZ7RERFQSihERlYRiREQloRgRUUkoRkRUEooREZWEYkREJaEYEVFJKEZEVBKKERGVhGJERCWhGBFRSShGRFQSihERlYRiREQloRgRUUkoRkRUEooREZWEYkREJaEYEVFJKEZEVBKKERGVhGJERCWhGBFRSShGRFQSihERlYRiREQloRgRUUkoRkRUEooREZWEYkREJaEYEVFJKEZEVIYKRUm3S7pB0rWSxkrbvpIul3Rrud+nev/ZktZIWi3phKr9mPI5aySdK0nb/0eKiNh609lTfKbto2wvKM/PAlbang+sLM+RdBiwGDgcWAScJ2mXss35wFJgfrkt2vYfISJi+9mW7vNJwLLyeBlwctV+ie17bd8GrAGOlXQQMMv2VbYNXFRtExHRC8OGooHPSFolaWlpO9D2OoByf0BpnwPcWW27trTNKY8ntm9G0lJJY5LGNmzYMGSJERHbbtch3/c023dJOgC4XNItk7x30HFCT9K+eaN9AXABwIIFCwa+JyJiRxhqT9H2XeV+PfBR4Fjg7tIlptyvL29fCxxcbT4XuKu0zx3QHhHRG1OGoqS9JO09/hh4DvB1YAWwpLxtCfDx8ngFsFjS7pIOoRlQubp0sTdKWlhGnU+rtomI6IVhus8HAh8ts2d2Bf7F9mWSvgosl3Q6cAdwKoDtGyUtB24C7gPOtH1/+awzgAuBPYFLyy0iojemDEXb3wSOHND+HeD4LWxzDnDOgPYx4IjplxkR0Y6c0RIRUUkoRkRUEooREZWEYkREJaEYEVFJKEZEVBKKERGVhGJERCWhGBFRSShGRFQSihERlYRiREQloRgRUUkoRkRUEooREZWEYkREJaEYEVFJKEZEVBKKERGVhGJERCWhGBFRSShGRFQSihERlYRiREQloRgRUUkoRkRUEooREZWEYkREJaEYEVFJKEZEVBKKERGVhGJERGXoUJS0i6SvSfpEeb6vpMsl3Vru96nee7akNZJWSzqhaj9G0g3ltXMlafv+OBER22Y6e4qvBG6unp8FrLQ9H1hZniPpMGAxcDiwCDhP0i5lm/OBpcD8clu0TdVHRGxnQ4WipLnA84B3V80nAcvK42XAyVX7JbbvtX0bsAY4VtJBwCzbV9k2cFG1TURELwy7p/j3wJ8BD1RtB9peB1DuDyjtc4A7q/etLW1zyuOJ7RERvTFlKEp6PrDe9qohP3PQcUJP0j7oO5dKGpM0tmHDhiG/NiJi2w2zp/g04ERJtwOXAM+S9H7g7tIlptyvL+9fCxxcbT8XuKu0zx3QvhnbF9heYHvB7Nmzp/HjRERsmylD0fbZtufankczgPLvtl8KrACWlLctAT5eHq8AFkvaXdIhNAMqV5cu9kZJC8uo82nVNhERvbDrNmz7FmC5pNOBO4BTAWzfKGk5cBNwH3Cm7fvLNmcAFwJ7ApeWW0REb0wrFG1/Fvhsefwd4PgtvO8c4JwB7WPAEdMtMiKiLTmjJSKiklCMiKgkFCMiKgnFiIhKQjEiopJQjIioJBQjIioJxYiISkIxIqKSUIyIqCQUIyIqCcWIiEpCMSKiklCMiKgkFCMiKgnFiIhKQjEiopJQjIioJBQjIioJxYiISkIxIqKSUIyIqCQUIyIqCcWIiEpCMSKiklCMiKgkFCMiKgnFiIhKQjEiopJQjIioJBQjIioJxYiISkIxIqIyZShK2kPS1ZKuk3SjpDeU9n0lXS7p1nK/T7XN2ZLWSFot6YSq/RhJN5TXzpWkHfNjRURsnWH2FO8FnmX7SOAoYJGkhcBZwErb84GV5TmSDgMWA4cDi4DzJO1SPut8YCkwv9wWbb8fJSJi200Zim78qDzdrdwMnAQsK+3LgJPL45OAS2zfa/s2YA1wrKSDgFm2r7Jt4KJqm4iIXhjqmKKkXSRdC6wHLrf9FeBA2+sAyv0B5e1zgDurzdeWtjnl8cT2iIjeGCoUbd9v+yhgLs1e3xGTvH3QcUJP0r75B0hLJY1JGtuwYcMwJUZEbBfTGn22/X3gszTHAu8uXWLK/frytrXAwdVmc4G7SvvcAe2DvucC2wtsL5g9e/Z0SoyI2CbDjD7PlvTI8nhP4NnALcAKYEl52xLg4+XxCmCxpN0lHUIzoHJ16WJvlLSwjDqfVm0TEdELuw7xnoOAZWUE+SHActufkHQVsFzS6cAdwKkAtm+UtBy4CbgPONP2/eWzzgAuBPYELi23iIjemDIUbV8PHD2g/TvA8VvY5hzgnAHtY8BkxyMjIjqVM1oiIioJxYiISkIxIqKSUIyIqCQUIyIqCcWIiEpCMSKiklCMiKgkFCMiKgnFiIhKQjEiopJQjIioJBQjIioJxYiISkIxIqKSUIyIqCQUIyIqCcWIiEpCMSKiklCMiKgkFCMiKgnFiIhKQjEiopJQjIioJBQjIioJxYiISkIxIqKSUIyIqCQUIyIqCcWIiEpCMSKiklCMiKgkFCMiKlOGoqSDJV0h6WZJN0p6ZWnfV9Llkm4t9/tU25wtaY2k1ZJOqNqPkXRDee1cSdoxP1ZExNYZZk/xPuA1tn8FWAicKekw4Cxgpe35wMrynPLaYuBwYBFwnqRdymedDywF5pfbou34s0REbLMpQ9H2OtvXlMcbgZuBOcBJwLLytmXAyeXxScAltu+1fRuwBjhW0kHALNtX2TZwUbVNREQvTOuYoqR5wNHAV4ADba+DJjiBA8rb5gB3VputLW1zyuOJ7RERvTF0KEp6OPBh4FW2fzjZWwe0eZL2Qd+1VNKYpLENGzYMW2JExDYbKhQl7UYTiBfb/khpvrt0iSn360v7WuDgavO5wF2lfe6A9s3YvsD2AtsLZs+ePezPEhGxzYYZfRbwHuBm22+rXloBLCmPlwAfr9oXS9pd0iE0AypXly72RkkLy2eeVm0TEdELuw7xnqcBvwvcIOna0vbnwFuA5ZJOB+4ATgWwfaOk5cBNNCPXZ9q+v2x3BnAhsCdwablFRPTGlKFo+0oGHw8EOH4L25wDnDOgfQw4YjoFRkS0KWe0RERUEooREZWEYkREJaEYEVFJKEZEVBKKERGVhGJERCWhGBFRSShGRFQSihERlYRiREQloRgRUUkoRkRUEooREZWEYkREJaEYEVFJKEZEVBKKERGVhGJERCWhGBFRSShGRFQSihERlYRiREQloRgRUUkoRkRUEooREZWEYkREJaEYEVFJKEZEVBKKERGVhGJERCWhGBFRmTIUJb1X0npJX6/a9pV0uaRby/0+1WtnS1ojabWkE6r2YyTdUF47V5K2/48TEbFthtlTvBBYNKHtLGCl7fnAyvIcSYcBi4HDyzbnSdqlbHM+sBSYX24TPzMionNThqLtzwPfndB8ErCsPF4GnFy1X2L7Xtu3AWuAYyUdBMyyfZVtAxdV20RE9MbWHlM80PY6gHJ/QGmfA9xZvW9taZtTHk9sj4jole090DLoOKEnaR/8IdJSSWOSxjZs2LDdiouImMrWhuLdpUtMuV9f2tcCB1fvmwvcVdrnDmgfyPYFthfYXjB79uytLDEiYvq2NhRXAEvK4yXAx6v2xZJ2l3QIzYDK1aWLvVHSwjLqfFq1TUREb+w61RskfQB4BrC/pLXA64G3AMslnQ7cAZwKYPtGScuBm4D7gDNt318+6gyakew9gUvLLSKiV6YMRdsv3sJLx2/h/ecA5wxoHwOOmFZ1EREtyxktERGVhGJERCWhGBFRSShGRFQSihERlYRiREQloRgRUUkoRkRUEooREZWEYkREJaEYEVFJKEZEVBKKERGVhGJERCWhGBFRSShGRFQSihERlYRiREQloRgRUUkoRkRUEooREZWEYkREJaEYEVFJKEZEVBKKERGVhGJERCWhGBFRSShGRFQSihERlYRiREQloRgRUUkoRkRUEooREZXWQ1HSIkmrJa2RdFbb3x8RMZlWQ1HSLsA7gecChwEvlnRYmzVEREym7T3FY4E1tr9p+2fAJcBJLdcQEbFFu7b8fXOAO6vna4Ent1xDL80765M77LNvf8vzdthn78i6YcfWHjFI26GoAW3e7E3SUmBpefojSat3UD37A98e9s166w6qYvpGtW4Y3dqnVXfPjGrtO7ruRw9qbDsU1wIHV8/nAndNfJPtC4ALdnQxksZsL9jR37O9jWrdMLq1j2rdMLq1d1V328cUvwrMl3SIpIcCi4EVLdcQEbFFre4p2r5P0p8AnwZ2Ad5r+8Y2a4iImEzb3Wdsfwr4VNvfuwU7vIu+g4xq3TC6tY9q3TC6tXdSt+zNxjkiImasnOYXEVFJKEZEVBKKscNIemW5f1rXtUQMK8cUe07SStvHS3qr7dd1Xc90SLrW9lGSrrH9xK7rmS5Jg2r+AfAt2/e1Xc9MIWlfwLa/18X3tz763AVJNzDgzBmaM2xs+wktlzQdB0l6OnCipEuYcFaQ7Wu6KWsoN0u6HZgt6fqqfRT+uwOcBzwRuJ6m5iPK4/0k/bHtz3RZ3JZImg38ITCP6t+47Zd1VdNUJD0K+GvgeOD7TZNmAf8OnGX79tZqmQl7ipIGns4zzva32qpluiSdApwOHAeMTXjZtp/VflXDk/RLNPNST5z4Wp//uwOUP0JvGp9LW1Z0+h/Am4CP2D6qw/K2SNKXgC8Aq4D7x9ttf7izoqYg6Srg74EP2b6/tO0CnAq8yvbC1mqZCaE4UfkLVP8F/W6H5QxF0l/aflPXdcwk493/QW2DXuuLPte2JZJutT1/uq/tCDOi+zxO0h8BbwR+yoPdaQOP6ayoKVTHtT456BhXn7vPkpbbfuGAwxej0n1eLel8miXuAF4EfEPS7sDPuytrSp+Q9BvlRIlRsUrSecAyHlxJ62BgCfC1NguZUXuKkm4FnmJ7ZFYMkXRFebgHsAC4jiZUngB8xfZxXdU2FUkH2V63pcMXI9B93hN4Oc2hCwFX0hxnvAd4mO0fdVjeFknaCOwF3EsT3uN/hGZ1WtgkyloIp9OsrzqHpua1NGsjvMf2va3VMsNC8TLgt23/pOtapqsc3zrH9g3l+RHAa23/XqeF7eRKMD7K9o5avi56ZqaF4tHA+4Cv0PwVBcD2KzorakiTHd/qpqLhlT2Xib9oP6AZOHqN7W+2X9XUJJ0I/F/gobYPkXQU8Ebbmw0a9YGkx9u+ZQtTiXp9qGUQSf/exUDijDqmCPwjzRD/DcADHdcyXTdLejfwfpqAeSlwc7clDe1tNOtm/gtNt2gx8EvAauC9wDM6q2xyr6e5hMZnAWxfK2lelwVN4dU0izP/7YDXDPR2psKEKVvQ/J48bry9zePPM21P8Uu2n9p1HVtD0h7AGcCvlabPA+fbvqe7qoYj6Su2nzyh7cu2F0q6zvaRXdU2mfG6JX3N9tGl7foRGCAaOZJWAD8E3kwzECqaaUXHQbvHn2fanuIV5VIH/8am3efeT8mxfY+kd9Ls6T4ArC4X/xoFD0h6IfCh8vyU6rU+/1X+uqTfAXaRNB94BfCljmsaSjnmfBjNAB0Ati/qrqLJ2T5R0m/RLBf2N7ZXSPp5F4NxM21P8bYBzbbd2yk54yQ9D3gX8B80f0UPAf7I9qWdFjYESY8B3g48hSYEvwz8d+A/gWNsX9lheVsk6WHAXwDPoflv/mmaydy93juX9HqaQxKH0axd+lzgStunTLZdH0h6OM20uf8GPNH23NZrmGGhuMfEX+hBbX0k6Rbg+bbXlOePBT5p+/HdVrZlkl4MfMb2d7quZVuUyf62vbHrWoZR5oUeCXzN9pGSDgTebfs3Oy5taJKOpJk+9662v3umrZIzqOszEt0hYP14IBbfBNZ3VcyQHg18UNIXJP1vSU+WNOiKjr0k6UklYK4HbpB0naRjuq5rCD+1/QBwXwn09fT4BIWapNMBbF9n+12Sdil7vq2ZEccUy/m3c4A9y7Sc8X+Ys4CHdVbY9Nwo6VPAcpou6KnAVyX9NoDtj3RZ3CC23wK8RdLewLOBlwHvknQzcBnwadt3d1njFN4DvNz2FwAkHUczpavvAy1jkh4J/BPN+c8/Aq7utKLhHS/pBTQTufej+e/9uTYLmBHdZ0lLgN+jOSOkXlRhI3BhHwNlIknvm+Rl93kFlInKwgrPBZ5j+4Su69kSSV+0/bSp2vqsTCGaZXvilJfekvQi4J3AT4AX2/5iq98/E0JxnKQX9HmlkEF2huNypcv8EuAxtt9Ylon6Jdu93nuR9Hc0PYkP0Oydvwj4HvBh6N9k6C1N2h7Xt3oHKaP8y2jmEv8KcBPw6jbPQptpobg78AI2X2fujV3VNBVJZ9GMfu4GrAQuBa72CP2PK4sqPAA8y/avSNqHJuif1HFpk6rOOx+kd8u2jVq9g5QBxT+x/f/KH9NXAy+zfXhrNYzQv61tVs59/gGbrzM36AyAXqmOyy2iOctiVI7LobLy9oRJ0L2dtB3dkTTL9g8ntM23fWtbNcyIgZbKXNuLui5ia5TpIB8tt/q43EVAb4/LFT9Xs2Co4RcrQ/f2NEtJL7X9fkmvHvCyge8CK9zRcvlTKWc/ja/uY5ozQ941ClPPaAZD/w6YY3tR+T1/CtBaKM64KTmSfrXrIraWpDmSnirp14D9ga/2eaCici5NmB8g6RyaJbj+T7clTWp8RsLeA26zgGNoDmP01UXA4cA7gH+gmcT9z51WNLwLaSbJH1SefwN4VZsFzIg9RT24yOmuwO9L+ibNaX6jstgpkt5Kc6D/Jh7s+pvmHOhes32xpFU0198QcLLtPi9msVHSfrbfsKU3SOrtcWjg0AmHJq6QdF1n1UzP/raXSzobwPZ9ku6faqPtaUaEIvD8rgvYDk6m+WVvbbHN7UXN1dnW04zijrftZruvq1ePTzrf4uCW7f/VVXFD+Jqkhba/DCDpyUCr01q2wY8l7ceDh1oW0owDtGZGDLRIGqP5pbgU+OyIHFvZhKRLgVP7utrzZNRc0e9gmuksAh4JrKMJyj+0vaqz4iYxqoNbZXL8ocAdpelRNLU/QM97RmVa0Ttorpz4dWA2cEqb8yxnSijuSnPQeRHwTOA7NMctLrX9jS5rG5akD9Ocz7qS0Vsg913AR21/ujx/Ds3/i+XA2ycuK9ZXIzTpfGSvXgm/+Pd6KM0f0NVt9yhmRChOJOkgml/uRTSrcXzZ9su7rWpy5ayczdhe1nYt0yVpzPaCQW3q8erhozbpfGfoEQFIeiqbzyVubdmzGRWKkk61/cEJbS8E/rPtU4m2hpqL+zyuPG39L+jWkvQZmj3c+qp4v07zR+mrtic9E6MrozbpfCfpEf0z8FjgWqoBxTZ7RDMtFK+Z+A9wUFsfSXoGzelPt9N0Kw4Gltju/eizpP1plvavr4r3BpoD6I+asPpPb4z6pPMR7RHdDBzW5RlbM2L0WdJzgd8A5kg6t3ppFnBfN1VN29/SHMtaDSDpcTSjub1fysrNJWX/dAsv9zIQi5GadD5uvEdkex3NNXDeO94j6ri0YXyd5vo967oqYEaEIs1Fk8aAE2lO8Ru3kWYF6FGwm6vLbNr+Rpky0nslTP6MZkJxvTx+38/FnTjp/BTgf3Zb0lDOBj44oe2sPveIJP0bzR+fvYGbJF3NpgOKrV1BcUaEou3rgOskrbS9tn5N0qE0U0X6bkzSe3jwzISXsGnA99nFwL/SzBf9Y2AJsKHTioYwapPOR7xH9DddFzBuph1TXA38pe3l5flrgNNtH9ZtZVMrK/ycyYPH5T4PnDcKk7klrbJ9jKor4Un6nO2nd13bZMqk84k29nWAS80S/kfRXOOknly+Ebiir+dq981MC8WDaK4Wdg9wIM2E1teM4oToUaIHL2f6aZou6V3Ah2w/tuPSJjXCk87nDuoR1Ydf+krNSvJvBQ6g+W8+firurLZqmFELQpQDz5fRrLoxD7io74EoaXyv9gZJ10+8dV3fkN4s6RHAa4DXAu9mNI7lXgb8hu39be9HM5K7nGYFmvM6rWxyK8vACvCLHtFHO6xnOv4aONH2I2zPsr13m4EIM29P8XKav/SvAObSjMx93vZrOy1sEpIOsr1uS2cp9P3shFE2wpPOR7ZHpB5c7mFGDLRU3mn7Y+Xx9yU9BfjzDuuZUtm7heYCSq+rXysr57xu8636RdIhNFNy5rHpWQqtjShupe9Keh2bTjr/Xpmm09upOeWP6GU0o9APAGePQiAWY5L+FfgYm44+t3YdpRm1pwiMX5Ftvu33lUnFe9u+reu6prKFiee/GLjos7Js1XtorrvxizCx3epV2qZrhCedj1yPaJwGX6DNbvHCbDMqFNVcP3YBzRJcj5P0y8AHu95dn4ykM2iOYT0G+I/qpb2BL9p+aSeFTYOkr4zKog87A0knVz0iyp7tn9t+U3dVjY6ZForXAkcD11SnbfV6b6sMUOwD/BVwVvXSRtvf7aaq6ZH0O8B84DNs2iXq9dXlRnjS+cj1iCT9me2/lvQOyhlEtTbPfZ5pxxR/ZtuSxk/b2qvrgqZi+wc03bUXA0g6gOYf6MMlPdz2HZNt3xO/Cvwu8Cwe7D67PO+zkZx0XveIaC4m/1Dg/UBve0Q0g0Gw6XXZOzHT9hRfS7PH8us0e14vA/7F9js6LWwIkn4TeBvwyzTz5B4N3OwWL/24tdRctvIJtn/WdS3TMcKTzq9lxHpEE0maRXMscWPb3z2j5inSrOL7IZqLmR9KM+t/bqcVDe/NwELgG7YPoTn1rPfLnRXX0Ux8HjXjZ66sk/Q8SUczGr8vPyurzIxMj2icpAVqrql0PfB1SddJanXRk5m2pzjKI7jj8+OuA462/YCkq20f23VtU5H0WeAJwFfp6CT/rSHp+TSXBz2YZon8WcAbbK/otLApjHiP6HrgTNtfKM+PozmdtbV/ozPimGI9gjvhLJC9GZ29re9LejjNOc8XS1pP/0/yH/f6rgvYGrY/UR7+gGbR1lEx3iP6IQ/2iJ7daUXD2zgeiAC2r5TUahd6Ruwp7iQjuHsBP6U55PES4BHAxba/02lhO7FRnXQ+ij0iNResgmZA7mE0a4WaMmHe9l+0VstMCMWdQfkHus7luhuS9gQOtH17p4VNQtKVto8rf+nrX7TWT/LfGqM26XyU57RKumKSl93mNKiE4ohQc1Gip46P4Kq5XssX3dPrhewMRm3S+c7QI+qDhOKIGLQAgUboeiGjaFQnne8sJH3C9vPb/t4ZMdCyk9gg6cTxkU9JJwHf7rimnd2oTjrfWczp4ksTiqPjj2lGnf+B5pjcncBp3Za00/stmms+j9Sk853I17r40nSfR0yZlqMuZvrPNGUJqz+1vb7rWmaK8VkWth8ozx8C7GH7J23VkD3FnpP0Utvvl/TqCe0A2H5bJ4XNDAcCt0gaqUnnI24lzZzK8fUfH0ZzTPepbRWQUOy/8VO09u60iplpJCedj7g96gVxbf9I0sPaLCDd54joDUlfpDlkcU15fgzwD7af0lYN2VPsOW16/d7NtLnO3Ewx6pPOR9yrgA9Kuqs8P4jmrJbWZE+x5yQtmex128vaqiWiDZJ2ozlnW8Atbvk62wnFEdPlOnMRbZB0BHAYm652flFr359QHA2SFtCsorw3zV/Q7wMvc08vyB6xNcqq4c+gCcVP0Vxr+0rbp7RVw0xbZHaUvZfmMqfzbD8aOJMmJCN2JqfQLKD8X7Z/HzgS2L3NAhKKo2OzdeaAdKFjZzM+cfu+cqhoPc2qP63J6PPouFrSP7LpOnOfHV+HLosUxE5iTNIjgX8CVtFM4r66zQJyTHFE9Gm9uYg2SJoHzLJ9/VTv3a7fm1CMiD6R9AQ2X+38I219f7rPI0LSfjSnnR1H032+EnhjLkcQOxNJ76W5yNmNbLpcW2uhmD3FESHpcpqLVr2/NL0EeIbtUbkgUcSUJN1k+7Aua8jo8+jY1/abbN9Wbm9mNK+lHDGZqyR1GorpPo+OKyQtBpaX56cAn+ywnogdYRlNMP4XzXJt4+ebt3YlwnSfR0RZnGAv4P7StAvw4/I4ixTETkHSGuDVbH4FxW+1VUP2FEeE7b0l7UtzIaX6nNBeXm4zYivdMX4doq5kT3FESPoD4JXAXOBaYCHwJdvHd1lXxPYk6TyaY+X/xqarnWdKTmzmlcCTgC/bfqakxwNv6LimiO1tT5owfE7V1uqUnITi6LjH9j2SkLS77VskHdp1URHbg6QXA58pi0B0KqE4OtaWc0I/Blwu6XvAXZNuETE6Hk2z4vZuNBevuhS42h0c38sxxREk6enAI4DLck3i2JlI2pvman6LgGOBm4HLgE/bvruVGhKKEdFXZSL3c4Hn2D6hle9MKEZEn0iaQ9OdrheE+Hxb359jihHRG5LeSrNW6E08eKKCac77b6eG7ClGRF9IWg08wfa9U755B8mCEBHRJ98EduuygHSfI6JPfgJcK2klm57R8oq2CkgoRkSfrCi3zuSYYkT0iqSHAo8rT1fb/nmr359QjIi+kPQMmjUVb6dZS/FgYEmbU3ISihHRG5JWAb9je3V5/jjgA7aPaauGjD5HRJ/sNh6IALa/Qcuj0RloiYg+GZP0HuCfy/OXAKvaLCDd54joDUm7A2fSXMpXNGeynNfmZO6EYkREJd3niOicpOW2XyjpBppznTeRq/lFxIwi6SDb6yQ9etDrbV7NL6PPEdE52+vKw5fb/lZ9A17eZi0JxYjok18f0PbcNgvIMcWI6JykM2j2CB8j6frqpb2BL7ZaS44pRkTXJD0C2Af4K+Cs6qWNtr/bai0JxYjoG0kHAHuMP7d9R1vfnWOKEdEbkn5T0q3AbcDnaBaGuLTNGhKKEdEnbwYWAt+wfQhwPC0fU0woRkSf/Nz2d4CHSHqI7SuAo9osIKPPEdEn35f0cJpzni+WtB64r80CMtASEb0haS/gpzS92JcAjwAuLnuP7dSQUIyIvpB0CLDO9j3l+Z7AgbZvb6uGHFOMiD75IPBA9fz+0taahGJE9Mmutn82/qQ8fmibBSQUI6JPNkg6cfyJpJOAb7dZQI4pRkRvSHoscDHwyzQrb98JnGZ7TWs1JBQjom/KtBzZ3tj6dycUI6Jrkl5q+/2SXj3oddtva6uWTN6OiD7Yq9zv3WkVZE8xImIT2VOMiM5JOney122/oq1aEooR0QetXvB+Muk+R0TvSJoFuIvR50zejojekLSgXPv5euDrkq6TdEyrNWRPMSL6oly06kzbXyjPjwPOs/2EtmrInmJE9MnG8UAEsH0l0GoXOnuKEdEbkv4OeBjwAcDAi4DvAR8GsH3NDq8hoRgRfSHpikletu1n7fAaEooREQ/KMcWI6A1J+0k6V9I1klZJeruk/dqsIaEYEX1yCbABeAFwSnn8r20WkO5zRPSGpFW2j5nQNmZ7QVs1ZE8xIvrkCkmLJT2k3F4IfLLNArKnGBG9IWkjzTJi95emXYAfl8e2PWuH15BQjIg+kbQvMB/YY7zN9ufa+v6skhMRvSHpD4BXAnOBa4GFwJeA49uqIccUI6JPXgk8CfiW7WcCR9Py1fwSihHRJ/fYvgdA0u62bwEObbOAdJ8jok/WSnok8DHgcknfA+5qs4AMtEREL0l6OvAI4DLbP2vtexOKEREPyjHFiIhKQjEiopJQjIioJBQjIioJxYiIyv8Hn9tQdhDwiHcAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "el00_mimetypes.drop(labels=['-']).plot.bar(figsize=(5,5))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Fetching the Text\n", - "Now that we know the majority of the remaining resources in this dataset have a text-based mimetype, we can gather all the text and do some basic analysis. First, we'll fetch all the text from just the first 50 rows. If you have access to a larger machine, you may certainly increase the number or run it across the whole DataFrame." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "text_df = fetch_all_text(el00_df.head(50))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Top 25 Words\n", - "Now that the text has been fetched, we'll do a simple summation and sorting, displaying the top 25 words from the first 50 rows of the 2000 Election dataset." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "information 248\n", - "com 150\n", - "voter 150\n", - "terms 121\n", - "contact 119\n", - "state 105\n", - "candidate 95\n", - "service 78\n", - "return 72\n", - "senate 70\n", - "candidates 62\n", - "elected 62\n", - "officials 62\n", - "check 56\n", - "ca 54\n", - "new 53\n", - "capitol 52\n", - "background 52\n", - "general 51\n", - "use 51\n", - "rights 50\n", - "press 50\n", - "2000 50\n", - "privacy 50\n", - "portions 50\n", - "dtype: int64" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "text_df.sum(axis=0).sort_values(ascending=False).head(25)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Next Steps\n", - "Please feel free to use this notebook and the functions found here to help in your exploration of the U.S. Election datasets." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}