From 9be6677eb7a69bba96985c9f91fa3a2239729519 Mon Sep 17 00:00:00 2001
From: Jida Li <77774296+jidalii@users.noreply.github.com>
Date: Thu, 24 Oct 2024 16:48:24 -0400
Subject: [PATCH 1/2] feat: redesign  event structure and refractor codes

---
 bu_passport/scripts/cfa_event_scraper.py | 350 ++++++++++++-----------
 1 file changed, 190 insertions(+), 160 deletions(-)

diff --git a/bu_passport/scripts/cfa_event_scraper.py b/bu_passport/scripts/cfa_event_scraper.py
index 9340d97..dce992c 100644
--- a/bu_passport/scripts/cfa_event_scraper.py
+++ b/bu_passport/scripts/cfa_event_scraper.py
@@ -1,10 +1,8 @@
-import re
 import requests
 from datetime import datetime
-import hashlib
 
 from dataclasses import dataclass, field
-from typing import List, Optional, Tuple
+from typing import List, Optional, Tuple, Dict
 
 import firebase_admin
 from firebase_admin import credentials, firestore
@@ -14,59 +12,64 @@
 from urllib.parse import urlparse, parse_qs
 
 
+def get_session_id_from_url(url):
+    parsed_url = urlparse(url)
+    query_params = parse_qs(parsed_url.query)
+    return query_params.get("oid", [None])[0]
+
+
+@dataclass
+class EventSession:
+    session_id: str = ""
+    start_time: Optional[datetime] = None
+    end_time: Optional[datetime] = None
+
+    def to_dict(self):
+        return {
+            "sessionId": self.session_id,
+            "startTime": self.start_time,
+            "endTime": self.end_time,
+            "savedUsers": [],
+        }
+
+
 @dataclass
 class CFAEvent:
     event_id: str = ""
-    event_id_hex: str = ""
-    title: Optional[str] = ""
-    description: Optional[str] = ""
+    title: Optional[str] = None
+    description: Optional[str] = None
     categories: List[str] = field(default_factory=list)
-    location: Optional[str] = ""
-    photo: Optional[str] = ""
+    location: Optional[str] = None
+    photo: Optional[str] = None
     points: int = 0  # Default points to 0
     start_time: Optional[datetime] = None
     end_time: Optional[datetime] = None
-    event_url: Optional[str] = ""
-    detail_url: Optional[str] = ""
+    event_url: Optional[str] = None
+    detail_url: Optional[str] = None
+    sessions: Dict[str, "EventSession"] = field(default_factory=dict)
 
-    def to_dict_with_empty_users(self) -> dict:
-        return {
-            "eventID": self.event_id,
-            "eventTitle": self.title,
-            "eventCategories": self.categories,
-            "eventLocation": self.location,
-            "eventStartTime": self.start_time,
-            "eventEndTime": self.end_time,
-            "eventURL": self.event_url,
-            "eventDescription": self.description,
-            "eventPhoto": self.photo,
-            "eventPoints": 30,
-            "savedUsers": [],
-        }
-        
     def to_dict(self) -> dict:
         return {
             "eventID": self.event_id,
             "eventTitle": self.title,
             "eventCategories": self.categories,
             "eventLocation": self.location,
-            "eventStartTime": self.start_time,
-            "eventEndTime": self.end_time,
             "eventURL": self.event_url,
             "eventDescription": self.description,
             "eventPhoto": self.photo,
             "eventPoints": 30,
+            "eventSessions": {
+                session_id: session.to_dict()
+                for session_id, session in self.sessions.items()
+            },
         }
 
-    def write_event_id_hex(self):
-        hash_object = hashlib.sha256()
 
-        # Encode the event_id and update the hash object
-        str_combined = f"{self.event_id}{self.start_time}"
-        hash_object.update(str_combined.encode('utf-8'))
+def fetch_and_parse_url(url: str) -> BeautifulSoup:
+    """Fetch content from a URL and parse it with BeautifulSoup."""
+    response = requests.get(url)
+    return BeautifulSoup(response.content, "html.parser")
 
-        # Get the hexadecimal representation of the hash
-        self.event_id_hex = hash_object.hexdigest()
 
 def scrape_raw_events(soup: BeautifulSoup) -> str:
     raw_events = (
@@ -88,13 +91,15 @@ def scrape_event_categories(raw_event: str) -> list[str]:
             "span", class_=lambda x: x and "bulp-event-topic" in x
         )
         if raw_topic_span:
-            event_topics = raw_topic_span.find_all("span", class_="bulp-event-topic-text")
+            event_topics = raw_topic_span.find_all(
+                "span", class_="bulp-event-topic-text"
+            )
             categories = [event.text for event in event_topics]
             return categories
         return []
     except:
         return []
-        
+
 
 def scrape_event_title(raw_event: str) -> str | None:
     try:
@@ -107,88 +112,42 @@ def scrape_event_title(raw_event: str) -> str | None:
         return None
 
 
-def scrape_event_datetime(
-    raw_event: str,
-) -> Tuple[datetime, datetime] | Tuple[None, None]:
-    raw_when_span = raw_event.find("span", class_="bulp-event-when")
-
-    def parse_date(html: str) -> str | None:
+def scrape_session_datetime(raw_detail):
+    def parse_datetime(start_date, start_time, end_date, end_time):
+        # Define Boston timezone
         try:
-            raw_date = html.find("span", class_="bulp-event-meta-date")
-            event_days_of_week = raw_date.find(class_="bulp-event-day")
-            event_month = raw_date.find(class_="bulp-event-month")
-            event_day = raw_date.find(class_="bulp-event-date")
-            event_date = (
-                f"{event_days_of_week.text} {event_month.text} {event_day.text}"
+            boston_tz = pytz.timezone("America/New_York")
+            # Parse the start and end times into naive datetime objects
+            start_time_naive = datetime.strptime(
+                f"{start_date} {start_time}", "%A, %B %d, %Y %I:%M %p"
             )
-            return event_date
-        except Exception as e:
-            print("parse_date:", e)
-            return None
-
-    def parse_time(html: str) -> Tuple[str, str] | Tuple[None, None]:
-        try:
-            raw_time: str = (
-                html.find("span", class_="bulp-event-meta-time")
-                .find("span", class_="bulp-event-time")
-                .text.strip()
+            end_time_naive = datetime.strptime(
+                f"{end_date} {end_time}", "%A, %B %d, %Y %I:%M %p"
             )
-            if raw_time.lower() == "all day":
-                return "12:00am", "11:59pm"
-            start_time, end_time = (time.strip() for time in raw_time.split("-"))
-            return start_time, end_time
-        except Exception as e:
-            print("parse_time:", e)
-            return None, None
-
-    def parse_daytime_range(
-        start_daytime: str, end_daytime: str
-    ) -> Tuple[datetime, datetime] | Tuple[None, None]:
-        boston_tz = pytz.timezone("America/New_York")
-        cur_time = datetime.now(boston_tz)
-        cur_month = cur_time.month
-        cur_year = cur_time.year
-
-        def parse_daytime(date_str: str) -> datetime | None:
-            try:
-                # Remove ordinal suffixes (e.g., '12th' -> '12')
-                cleaned_date_str = re.sub(r"(\d+)(st|nd|rd|th)", r"\1", date_str)
-                parsed_date = datetime.strptime(cleaned_date_str, "%A %b %d %I:%M%p")
-                return boston_tz.localize(parsed_date)
-            except Exception as e:
-                print("parse_daytime:", e)
-                return None
-
-        try:
-            start = parse_daytime(start_daytime)
-            end = parse_daytime(end_daytime)
-            if start is None or end is None:
-                print("empty daytime")
-                return start, end
-
-            # Adjust the year based on the current month
-            if start.month >= cur_month:
-                start = start.replace(year=cur_year)
-            else:
-                start = start.replace(year=cur_year + 1)
 
-            if end.month >= cur_month:
-                end = end.replace(year=cur_year)
-            else:
-                end = end.replace(year=cur_year + 1)
-
-            return start, end
+            # Localize them to Boston timezone to make them timezone-aware
+            start_time = boston_tz.localize(start_time_naive)
+            end_time = boston_tz.localize(end_time_naive)
+            return start_time, end_time
         except Exception as e:
-            print("parse_daytime_range:", e)
+            print(e)
             return None, None
 
-    # find date
-    event_date = parse_date(raw_when_span)
-    start_time, end_time = parse_time(raw_when_span)
+    all_day_tag = raw_detail.find("li", class_="single-event-schedule-allday")
+    if all_day_tag:
+        date_text = all_day_tag.find("span", class_="single-event-date").text
+        return parse_datetime(date_text, "12:00 am", date_text, "11:59 pm")
+    else:
+        start_tag = raw_detail.find("li", class_="single-event-schedule-start")
+        end_tag = raw_detail.find("li", class_="single-event-schedule-end")
+        if start_tag and end_tag:
+            start_time = start_tag.find("span", class_="single-event-time").text
+            start_date = start_tag.find("span", class_="single-event-date").text
 
-    start_daytime = f"{event_date} {start_time}"
-    end_daytime = f"{event_date} {end_time}"
-    return parse_daytime_range(start_daytime, end_daytime)
+            end_time = end_tag.find("span", class_="single-event-time").text
+            end_date = end_tag.find("span", class_="single-event-date").text
+            return parse_datetime(start_date, start_time, end_date, end_time)
+    return None, None
 
 
 def scrape_event_location(raw_event: str) -> str | None:
@@ -203,16 +162,17 @@ def scrape_event_detail_link(raw_event: str) -> Tuple[str, str] | Tuple[None, No
     try:
         span: str = raw_event.find("div", class_="bulp-event-buttons")
         if not span:
-            return None, None
+            return None, None, None
         a_tag = span.find("a", class_="bulp-event-readmore")
         href = a_tag["href"]
 
         parsed_url = urlparse(href)
         query_params = parse_qs(parsed_url.query)
         eid = query_params.get("eid", [None])[0]
-        return f"https://www.bu.edu{href}", eid
+        oid = query_params.get("oid", [None])[0]
+        return f"https://www.bu.edu{href}", eid, oid
     except:
-        return None, None
+        return None, None, None
 
 
 def scrape_detail_page(soup: BeautifulSoup):
@@ -262,62 +222,132 @@ def scrape_event_event_link(raw_detail) -> str | None:
         return None
 
 
-def main(table_name: str):
-    cred = credentials.Certificate("../serviceAccountKey.json")
-    firebase_admin.initialize_app(cred)
-    db = firestore.client()
-    
-    print("Starting scraper")
+def extract_event_data(raw_event) -> CFAEvent:
+    """Extract data from a raw event and return a CFAEvent object."""
+    cfa_event = CFAEvent()
+    raw_event = raw_event.find("div", class_=lambda x: x and "bulp-item-content" in x)
 
-    url = "https://www.bu.edu/cfa/news/bu-arts-initiative/"
-    response = requests.get(url)
-    soup = BeautifulSoup(response.content, "html.parser")
+    cfa_event.categories = scrape_event_categories(raw_event)
+    cfa_event.title = scrape_event_title(raw_event)
+    cfa_event.location = scrape_event_location(raw_event)
+
+    cfa_event.detail_url, cfa_event.event_id, session_id = scrape_event_detail_link(
+        raw_event
+    )
+    if session_id:
+        cfa_event.sessions[session_id] = EventSession(session_id=session_id)
+        cfa_event.sessions[session_id].session_id = session_id
+
+    return cfa_event
 
+
+def scrape_events(soup: BeautifulSoup) -> List[CFAEvent]:
+    """Scrape events from the soup and return a list of CFAEvent objects."""
     raw_events = scrape_raw_events(soup)
-    cfa_events: list[CFAEvent] = []
-    # Iterate through each slick-slide and extract content
+    cfa_events: List[CFAEvent] = []
+
     for raw_event in raw_events:
         try:
-            cfa_event = CFAEvent()
-            raw_event = raw_event.find(
-                "div", class_=lambda x: x and "bulp-item-content" in x
-            )
-            cfa_event.categories = scrape_event_categories(raw_event)
-            cfa_event.title = scrape_event_title(raw_event)
-
-            cfa_event.start_time, cfa_event.end_time = scrape_event_datetime(raw_event)
-            cfa_event.location = scrape_event_location(raw_event)
-            cfa_event.detail_url, cfa_event.event_id = scrape_event_detail_link(
-                raw_event
-            )
-            cfa_event.write_event_id_hex()
-
+            cfa_event = extract_event_data(raw_event)
             cfa_events.append(cfa_event)
         except Exception as e:
-            print(f"Error extracting slide data: {e}")
+            print(f"Error extracting event data: {e}")
+
+    return cfa_events
+
+
+def initialize_firestore() -> firestore.client:
+    """Initialize Firestore client."""
+    cred = credentials.Certificate("../serviceAccountKey.json")
+    firebase_admin.initialize_app(cred)
+    return firestore.client()
+
 
+def update_database(db, cfa_events, table_name):
     for _, event in enumerate(cfa_events):
-        if not event.detail_url:
-            continue
-        response = requests.get(event.detail_url)
-        soup = BeautifulSoup(response.content, "html.parser")
-        raw_detail = scrape_detail_page(soup)
-
-        event.photo = scrape_event_image(raw_detail)
-        event.description = scrape_event_description(raw_detail)
-        event.event_url = scrape_event_event_link(raw_detail)
-
-    # update firebase db
-    for i, event in enumerate(cfa_events):
-        
-        doc_ref = db.collection(table_name).document(event.event_id_hex)
-    
-        if doc_ref.get().exists:
-            print(f"Updating event with pk {event.event_id_hex} in db")
-            doc_ref.set(event.to_dict(), merge=True)
+
+        doc_ref = db.collection(table_name).document(event.event_id)
+        doc = doc_ref.get()
+
+        if doc.exists:
+            print(f"Updating event with pk {event.event_id} in db")
+            existing_data = doc.to_dict()
+
+            # Extract existing sessions from Firestore
+            existing_sessions = existing_data.get("eventSessions", {})
+
+            # New sessions to be appended
+            updated_sessions = (
+                event.sessions.copy()
+            )  # Start with a copy of the new event sessions
+
+            # Skip existing sessions
+            for session_id, session in existing_sessions.items():
+                if session_id in event.sessions:
+                    # If session exists, remove from the update list
+                    updated_sessions.pop(session_id, None)
+
+            # If there are new sessions, append them to existing sessions
+            if updated_sessions:
+                existing_sessions.update(
+                    {
+                        session_id: session.to_dict()
+                        for session_id, session in updated_sessions.items()
+                    }
+                )
+                # Update the event data with the new sessions
+                event_dict = event.to_dict()
+                event_dict["eventSessions"] = (
+                    existing_sessions  # Update with merged sessions
+                )
+
+                # Merge updated data into Firestore
+                doc_ref.set(event_dict, merge=True)
+            else:
+                print(f"No new sessions to add for event {event.event_id}")
         else:
-            print(f"Adding event with pk {event.event_id_hex} in db")
-            doc_ref.set(event.to_dict_with_empty_users())
+            print(f"Adding event with pk {event.event_id} in db")
+            doc_ref.set(event.to_dict())
+
+
+def update_event_details(event: CFAEvent):
+    """Update event details by scraping its detail page."""
+    if not event.detail_url:
+        return
+
+    response = requests.get(event.detail_url)
+    soup = BeautifulSoup(response.content, "html.parser")
+    raw_detail = scrape_detail_page(soup)
+
+    event.photo = scrape_event_image(raw_detail)
+    event.description = scrape_event_description(raw_detail)
+    event.event_url = scrape_event_event_link(raw_detail)
+
+    session_id = get_session_id_from_url(event.detail_url)
+    if session_id in event.sessions:
+        event.sessions[session_id].start_time, event.sessions[session_id].end_time = (
+            scrape_session_datetime(raw_detail)
+        )
+
+
+def main(table_name):
+    """Main function to run the scraper."""
+    db = initialize_firestore()
+
+    print("Starting scraper")
+
+    # Fetch and parse the event list page
+    soup = fetch_and_parse_url("https://www.bu.edu/cfa/news/bu-arts-initiative/")
+    cfa_events = scrape_events(soup)
+
+    # Update event details for each scraped event
+    for event in cfa_events:
+        update_event_details(event)
+
+    # Update the database with the events
+    update_database(db, cfa_events, table_name)
+
     print("Event Scraping has completed")
 
-main("test_events")
+
+main("new_events")

From f0920710f7726287ce3e691a28b1d451bd41b62c Mon Sep 17 00:00:00 2001
From: Jida Li <77774296+jidalii@users.noreply.github.com>
Date: Fri, 25 Oct 2024 09:24:52 -0400
Subject: [PATCH 2/2] feat: optimized database updating logic, fixed bugs in
 fetching locations

---
 .../scripts/bu_event_calendar_scraper.py      | 306 ++++++++++++++++++
 bu_passport/scripts/cfa_event_scraper.py      |  46 +--
 2 files changed, 331 insertions(+), 21 deletions(-)
 create mode 100644 bu_passport/scripts/bu_event_calendar_scraper.py

diff --git a/bu_passport/scripts/bu_event_calendar_scraper.py b/bu_passport/scripts/bu_event_calendar_scraper.py
new file mode 100644
index 0000000..eda6c52
--- /dev/null
+++ b/bu_passport/scripts/bu_event_calendar_scraper.py
@@ -0,0 +1,306 @@
+from email.contentmanager import raw_data_manager
+import requests
+from datetime import datetime
+
+from dataclasses import dataclass, field
+from typing import List, Optional, Tuple, Dict
+
+import firebase_admin
+from firebase_admin import credentials, firestore
+
+from bs4 import BeautifulSoup
+import pytz
+from urllib.parse import urlparse, parse_qs
+
+
+@dataclass
+class EventSession:
+    session_id: str = ""
+    start_time: Optional[datetime] = None
+    end_time: Optional[datetime] = None
+
+    def to_dict(self):
+        return {
+            "sessionId": self.session_id,
+            "startTime": self.start_time,
+            "endTime": self.end_time,
+            "savedUsers": [],
+        }
+
+
+@dataclass
+class CFAEvent:
+    event_id: str = ""
+    title: Optional[str] = None
+    description: Optional[str] = None
+    categories: List[str] = field(default_factory=list)
+    location: Optional[str] = None
+    photo: Optional[str] = None
+    points: int = 0  # Default points to 0
+    event_url: Optional[str] = None
+    detail_url: Optional[str] = None
+    sessions: Dict[str, "EventSession"] = field(default_factory=dict)
+
+    def to_dict(self):
+        return {
+            "eventID": self.event_id,
+            "eventTitle": self.title,
+            "eventCategories": [],
+            "eventLocation": self.location,
+            "eventURL": self.event_url,
+            "eventDescription": self.description,
+            "eventPhoto": self.photo,
+            "eventPoints": 0,
+            "eventSessions": {
+                session_id: session.to_dict()
+                for session_id, session in self.sessions.items()
+            },
+        }
+        
+    def to_dict_no_sessions(self):
+        return {
+            "eventID": self.event_id,
+            "eventTitle": self.title,
+            "eventLocation": self.location,
+            "eventURL": self.event_url,
+            "eventDescription": self.description,
+            "eventPhoto": self.photo,
+            "eventPoints": 0,
+        }
+
+
+def scrape_raw_events_ls(soup: BeautifulSoup) -> str:
+    raw_events_ls = (
+        soup.find("div", class_="wrapper")
+        .find("main", class_="content")
+        .find("div", class_="content-container")
+        .find("article")
+        # .find("div", class_=lambda x: x and "bulp-content" in x)
+        # .find("div", class_=lambda x: x and "bulp-container" in x)
+        .find_all("ul", class_="calendar-list-events")
+    )
+    return raw_events_ls
+
+
+def scrape_raw_events(content):
+    raw_events = content.find_all("li", class_="calendar-list-event")
+    return raw_events
+
+
+def scrape_event_image(content):
+    img_tag = content.find("img")
+    img_link = img_tag["srcset"].split(", ")[0].split(" ")[0] if img_tag else None
+    return img_link
+
+
+def scrape_event_detail_link(content: str) -> Tuple[str, str] | Tuple[None, None]:
+    try:
+        event_details_tag = content.find("a", class_="bulp-event-readmore")
+        url = event_details_tag["href"] if event_details_tag else None
+        parsed_url = urlparse(url)
+        query_params = parse_qs(parsed_url.query)
+        eid = query_params.get("eid", [None])[0]
+        oid = query_params.get("oid", [None])[0]
+        return url, eid, oid
+    except:
+        return None, None, None
+
+
+def get_session_id_from_url(url):
+    parsed_url = urlparse(url)
+    query_params = parse_qs(parsed_url.query)
+    return query_params.get("oid", [None])[0]
+
+
+def scrape_event_title(content):
+    event_title_tag = content.find("div", class_="calendar-list-event-link")
+    event_title = event_title_tag.text if event_title_tag else None
+    return event_title
+
+
+def scrape_detail_page(soup: BeautifulSoup):
+    return (
+        soup.find("div", class_="wrapper")
+        .find("main", class_="content")
+        .find("div", class_="content-container-narrow")
+        .find("article")
+        .find("div", class_="single-event")
+    )
+
+
+def scrape_event_description(raw_detail) -> str | None:
+    try:
+        raw_summary = raw_detail.find("div", class_="single-event-description")
+        text_content = raw_summary.get_text(separator=" ", strip=True)
+        return text_content
+    except:
+        return None
+
+
+def scrape_event_event_link(raw_detail) -> str | None:
+    try:
+        dd_tag = (
+            raw_detail.find("div", class_="single-event-additional-details")
+            .find("dl", class_="tabular")
+            .find("dd", class_="single-event-info-url")
+        )
+        if not dd_tag:
+            return None
+
+        url = dd_tag.find("a")
+        return url["href"]
+    except:
+        return None
+
+
+def scrape_session_location(raw_detail):
+    try:
+        # Find the dd tag with the location class
+        dd_tag = raw_detail.find("dd", class_="single-event-info-location")
+        
+        # Check if the dd tag exists and return its text
+        if dd_tag:
+            return dd_tag.text.strip()
+        return None
+    except Exception as e:
+        print(f"Error occurred: {e}")
+        return None
+
+
+def scrape_session_datetime(raw_detail):
+    def parse_datetime(start_date, start_time, end_date, end_time):
+        # Define Boston timezone
+        try:
+            boston_tz = pytz.timezone("America/New_York")
+            # Parse the start and end times into naive datetime objects
+            start_time_naive = datetime.strptime(
+                f"{start_date} {start_time}", "%A, %B %d, %Y %I:%M %p"
+            )
+            end_time_naive = datetime.strptime(
+                f"{end_date} {end_time}", "%A, %B %d, %Y %I:%M %p"
+            )
+
+            # Localize them to Boston timezone to make them timezone-aware
+            start_time = boston_tz.localize(start_time_naive)
+            end_time = boston_tz.localize(end_time_naive)
+            return start_time, end_time
+        except Exception as e:
+            print(e)
+            return None, None
+
+    all_day_tag = raw_detail.find("li", class_="single-event-schedule-allday")
+    if all_day_tag:
+        date_text = all_day_tag.find("span", class_="single-event-date").text
+        return parse_datetime(date_text, "12:00 am", date_text, "11:59 pm")
+    else:
+        start_tag = raw_detail.find("li", class_="single-event-schedule-start")
+        end_tag = raw_detail.find("li", class_="single-event-schedule-end")
+        if start_tag and end_tag:
+            start_time = start_tag.find("span", class_="single-event-time").text
+            start_date = start_tag.find("span", class_="single-event-date").text
+
+            end_time = end_tag.find("span", class_="single-event-time").text
+            end_date = end_tag.find("span", class_="single-event-date").text
+            return parse_datetime(start_date, start_time, end_date, end_time)
+    return None, None
+
+
+def initialize_firestore() -> firestore.client:
+    """Initialize Firestore client."""
+    cred = credentials.Certificate("../serviceAccountKey.json")
+    firebase_admin.initialize_app(cred)
+    return firestore.client()
+
+
+def update_database(db, cfa_events, table_name):
+    for _, event in enumerate(cfa_events):
+
+        doc_ref = db.collection(table_name).document(event.event_id)
+        doc = doc_ref.get()
+
+        if doc.exists:
+            print(f"Updating event with pk {event.event_id} in db")
+            existing_data = doc.to_dict()
+
+            # Update only the new sessions in eventSessions
+            existing_sessions = existing_data.get("eventSessions", {})
+            updated_sessions = event.sessions.copy()
+
+            # Skip existing sessions
+            for session_id in existing_sessions:
+                updated_sessions.pop(session_id, None)
+
+            # Merge event data without eventSessions
+            event_dict = event.to_dict()
+            if updated_sessions:
+                existing_sessions.update({
+                    session_id: session.to_dict()
+                    for session_id, session in updated_sessions.items()
+                })
+                doc_ref.set({"eventSessions": existing_sessions}, merge=True)
+            else:
+                event_dict.pop("eventSessions", None)  # Exclude if no new sessions
+
+            # Merge attributes without overwriting eventSessions
+            doc_ref.set(event.to_dict_no_sessions(), merge=True)
+
+        else:
+            print(f"Adding event with pk {event.event_id} in db")
+            doc_ref.set(event.to_dict())
+
+
+def main(table_name):
+    db = initialize_firestore()
+    print("Starting scraper")
+
+    url = "https://www.bu.edu/cfa/news/calendar/?amp%3B&topic=8639&date=20241024"
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, "html.parser")
+
+    cfa_events: list[CFAEvent] = []
+
+    raw_events_ls = scrape_raw_events_ls(soup)
+    for _, _raw_events in enumerate(raw_events_ls):
+        raws_events = scrape_raw_events(_raw_events)
+        for raw_event in raws_events:
+            cfa_event = CFAEvent()
+
+            cfa_event.photo = scrape_event_image(raw_event)
+            cfa_event.detail_url, cfa_event.event_id, session_id = (
+                scrape_event_detail_link(raw_event)
+            )
+            if cfa_event.event_id:
+                if not session_id:
+                    session_id = "0"
+                cfa_event.sessions[session_id] = EventSession()
+                cfa_event.sessions[session_id].session_id = session_id
+
+            cfa_event.title = scrape_event_title(raw_event)
+
+            cfa_events.append(cfa_event)
+
+    for event in cfa_events:
+        if not event.detail_url:
+            continue
+        response = requests.get(event.detail_url)
+        soup = BeautifulSoup(response.content, "html.parser")
+        raw_detail = scrape_detail_page(soup)
+
+        event.description = scrape_event_description(raw_detail)
+        event.event_url = scrape_event_event_link(raw_detail)
+        event.location = scrape_session_location(raw_detail)
+        session_id = get_session_id_from_url(event.detail_url)
+        # print(event.detail_url)
+        if not session_id:
+            session_id = "0"
+        event.sessions[session_id].start_time, event.sessions[session_id].end_time = (
+            scrape_session_datetime(raw_detail)
+        )
+        print(event.location)
+
+    update_database(db, cfa_events, table_name)
+
+    print("Event Scraping has completed")
+
+
+main("new_events1")
diff --git a/bu_passport/scripts/cfa_event_scraper.py b/bu_passport/scripts/cfa_event_scraper.py
index dce992c..63d84b5 100644
--- a/bu_passport/scripts/cfa_event_scraper.py
+++ b/bu_passport/scripts/cfa_event_scraper.py
@@ -64,6 +64,18 @@ def to_dict(self) -> dict:
             },
         }
 
+    def to_dict_no_sessions(self):
+        return {
+            "eventID": self.event_id,
+            "eventTitle": self.title,
+            "eventCategories": self.categories,
+            "eventLocation": self.location,
+            "eventURL": self.event_url,
+            "eventDescription": self.description,
+            "eventPhoto": self.photo,
+            "eventPoints": 0,
+        }
+
 
 def fetch_and_parse_url(url: str) -> BeautifulSoup:
     """Fetch content from a URL and parse it with BeautifulSoup."""
@@ -273,21 +285,16 @@ def update_database(db, cfa_events, table_name):
             print(f"Updating event with pk {event.event_id} in db")
             existing_data = doc.to_dict()
 
-            # Extract existing sessions from Firestore
+            # Update only the new sessions in eventSessions
             existing_sessions = existing_data.get("eventSessions", {})
-
-            # New sessions to be appended
-            updated_sessions = (
-                event.sessions.copy()
-            )  # Start with a copy of the new event sessions
+            updated_sessions = event.sessions.copy()
 
             # Skip existing sessions
-            for session_id, session in existing_sessions.items():
-                if session_id in event.sessions:
-                    # If session exists, remove from the update list
-                    updated_sessions.pop(session_id, None)
+            for session_id in existing_sessions:
+                updated_sessions.pop(session_id, None)
 
-            # If there are new sessions, append them to existing sessions
+            # Merge event data without eventSessions
+            event_dict = event.to_dict()
             if updated_sessions:
                 existing_sessions.update(
                     {
@@ -295,16 +302,13 @@ def update_database(db, cfa_events, table_name):
                         for session_id, session in updated_sessions.items()
                     }
                 )
-                # Update the event data with the new sessions
-                event_dict = event.to_dict()
-                event_dict["eventSessions"] = (
-                    existing_sessions  # Update with merged sessions
-                )
-
-                # Merge updated data into Firestore
-                doc_ref.set(event_dict, merge=True)
+                event_dict["eventSessions"] = existing_sessions
             else:
-                print(f"No new sessions to add for event {event.event_id}")
+                event_dict.pop("eventSessions", None)  # Exclude if no new sessions
+
+            # Merge attributes without overwriting eventSessions
+            doc_ref.set(event.to_dict_no_sessions(), merge=True)
+
         else:
             print(f"Adding event with pk {event.event_id} in db")
             doc_ref.set(event.to_dict())
@@ -350,4 +354,4 @@ def main(table_name):
     print("Event Scraping has completed")
 
 
-main("new_events")
+main("new_events1")