From 8aadfe845f3e9b3e619c1158758276cbcfdb0e50 Mon Sep 17 00:00:00 2001 From: Zohar Babin Date: Wed, 25 Dec 2024 19:53:49 -0500 Subject: [PATCH] Refactor message handling to replace viral messages with categorized message display; add MessageCategory model and update analysis prompt for improved insights. --- gh_static_front/app.js | 103 +++++++++++++++++++++++++------------ gh_static_front/index.html | 9 ++-- main.py | 52 +++++++++++++++---- 3 files changed, 118 insertions(+), 46 deletions(-) diff --git a/gh_static_front/app.js b/gh_static_front/app.js index b6a51f5..af8e250 100644 --- a/gh_static_front/app.js +++ b/gh_static_front/app.js @@ -231,7 +231,7 @@ function displayResults(data) { displayMemorableMoments(data); displayHappiestDays(data); displaySaddestDays(data); - displayViralMessages(data); + displayMessageCategories(data); displayMediaStats(data); displaySharedLinks(data); @@ -406,39 +406,6 @@ function displaySaddestDays(data) { document.getElementById('saddest-days').innerHTML = saddestHtml; } -function displayViralMessages(data) { - if (!data.viral_messages) { - console.log('No viral messages found'); - return; - } - - const viralHtml = data.viral_messages - .map(msg => { - return ` -
-
-
-

${msg.message}

-
- 💬 ${msg.replies} - ❤️ ${msg.reactions} -
-
-
-
- ${msg.thread.map(reply => ` -

${reply}

- `).join('')} -
-
- `; - }) - .join(''); - - document.getElementById('viral-messages').innerHTML = viralHtml; -} - function displayMediaStats(data) { if (!data.media_stats) { console.log('No media stats found'); @@ -532,4 +499,72 @@ function displaySharedLinks(data) { .join(''); document.getElementById('shared-links').innerHTML = linksHtml; +} + +function displayMessageCategories(data) { + if (!data.message_categories || data.message_categories.length === 0) { + console.log('No message categories found'); + return; + } + + const categoryColors = { + 'celebration': 'from-yellow-50 to-yellow-100', + 'business': 'from-blue-50 to-blue-100', + 'team': 'from-green-50 to-green-100', + 'strategic': 'from-purple-50 to-purple-100', + 'knowledge': 'from-red-50 to-red-100' + }; + + const getGradient = (category) => { + const baseCategory = Object.keys(categoryColors).find(key => + category.toLowerCase().includes(key.toLowerCase()) + ); + return categoryColors[baseCategory] || 'from-gray-50 to-gray-100'; + }; + + const categoriesHtml = data.message_categories + .map(category => ` +
+
+
+
+

${category.category}

+

${category.subcategory}

+
+ + Impact: ${(category.impact_score * 100).toFixed(0)}% + +
+ +
+ ${category.messages.map(msg => ` +
+

${msg}

+
+ `).join('')} +
+ +
+ ${category.participants.map(participant => ` + + ${participant} + + `).join('')} +
+ +
+ ${category.context} +
+ +
+ ${category.timestamp} +
+
+
+ `) + .join(''); + + document.getElementById('message-categories').innerHTML = categoriesHtml; } \ No newline at end of file diff --git a/gh_static_front/index.html b/gh_static_front/index.html index c93ae51..c4374bf 100644 --- a/gh_static_front/index.html +++ b/gh_static_front/index.html @@ -120,10 +120,12 @@

Most Reflective Days

- +
-

Epic Chat Moments

-
+

Message Categories

+
+ +
@@ -162,6 +164,7 @@

Holiday Greeting

Your Group's Holiday Poem

+ diff --git a/main.py b/main.py index 957e3a8..96790ee 100644 --- a/main.py +++ b/main.py @@ -160,6 +160,15 @@ class MediaStats(BaseModel): top_media_sharers: List[UserActivity] most_reacted_media: List[MediaItem] +class MessageCategory(BaseModel): + category: str + subcategory: str + messages: List[str] + context: str + participants: List[str] + impact_score: float + timestamp: str + class ChatSummary(BaseModel): most_active_users: List[UserActivity] popular_topics: List[str] @@ -175,6 +184,7 @@ class ChatSummary(BaseModel): shared_links: List[SharedLink] chat_poem: str media_stats: MediaStats + message_categories: List[MessageCategory] def calculate_md5(content: bytes) -> str: """Calculate MD5 hash of file content.""" @@ -585,13 +595,18 @@ async def process_date_group(dates): batch_results = [] for date in dates: - day_messages = daily_messages[date] - if len(day_messages) > 5: - indices = [0, len(day_messages)//4, len(day_messages)//2, - (3*len(day_messages))//4, len(day_messages)-1] - group_messages.extend([day_messages[i] for i in indices]) + # Filter out media messages first + filtered_messages = [ + msg for msg in daily_messages[date] + if not MEDIA_PATTERN.search(str(msg)) + ] + + if len(filtered_messages) > 5: + indices = [0, len(filtered_messages)//4, len(filtered_messages)//2, + (3*len(filtered_messages))//4, len(filtered_messages)-1] + group_messages.extend([filtered_messages[i] for i in indices]) else: - group_messages.extend(day_messages) + group_messages.extend(filtered_messages) if len(group_messages) >= 15: sentiment = await analyze_sentiment_batch(group_messages) @@ -695,11 +710,29 @@ async def analyze_chat(file: UploadFile = File(...)): remaining = df[~df['message'].isin(samples)].sample(n=min(sample_size - len(samples), len(df))) samples.extend(remaining['message'].tolist()) - prompt = f"""Analyze this WhatsApp chat and provide insights in the following format: + prompt = f"""Analyze this WhatsApp chat and provide comprehensive insights with the following structure: + 1. Key topics discussed (max 5) 2. Three most memorable moments 3. A festive holiday greeting based on the chat context 4. Create a comedic rhyming poem (at least 8 lines) that tells a story about the group's memorable moments and inside jokes. Make it festive and entertaining! + 5. Categorize messages into meaningful groups by analyzing: + - Type of interaction (celebration, milestone, discussion, etc.) + - Context and significance + - Participant dynamics + - Impact on team/organization + - Cultural significance + + For each identified category, provide: + - Category name and subcategory + - Representative messages + - Context and significance + - Involved participants + - Impact score (0.0 to 1.0) + - Timestamp + + Don't use predetermined categories - identify natural patterns and groupings that emerge from the content. + Consider message context, participant engagement, long-term significance, and cultural dynamics. Chat sample: {' '.join(samples)}""" @@ -879,7 +912,7 @@ def process_message_threads(messages_df: pd.DataFrame) -> List[ViralMessage]: word_counts_converted = {k: int(v) for k, v in word_counts.items()} activity_converted = {k: int(v) for k, v in activity.items()} - # Create summary with properly structured data + # Create summary with properly structured data including message categories summary = ChatSummary( most_active_users=[UserActivity(name=k, count=v) for k, v in most_active_converted.items()], popular_topics=response.popular_topics, @@ -894,7 +927,8 @@ def process_message_threads(messages_df: pd.DataFrame) -> List[ViralMessage]: viral_messages=viral_messages, shared_links=shared_links, chat_poem=response.chat_poem, - media_stats=media_stats + media_stats=media_stats, + message_categories=response.message_categories if hasattr(response, 'message_categories') else [] ) analysis_time = time.time() - analysis_start