-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTextToChatBenchmarking.py
392 lines (373 loc) · 51.5 KB
/
TextToChatBenchmarking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
# Local libraries
import LLMConfiguration
import DatabaseConfiguration
# Others
from rouge import Rouge
from bert_score import score
import numpy as np
import time
# 15 easy test cases
def load_easy_benchmark():
"""Loads benchmark dataset of text context 100-250 tokens to summarize"""
benchmark_data = [
{
# 146
"question": "What are the names of the employees in the Sales department?",
"raw_data":"[{'EMPLOYEE_NAME': 'John Doe', 'DEPT_NAME': 'Sales'}, {'EMPLOYEE_NAME': 'Jane Smith', 'DEPT_NAME': 'Sales'}]",
"summary": "Employees in the Sales department:\n- John Doe\n- Jane Smith"
},
{
# 145
"question": "What are the total sales for each product?",
"raw_data":"[{'PRODUCT_NAME': 'Product A', 'TOTAL_SALES': 1000}, {'PRODUCT_NAME': 'Product B', 'TOTAL_SALES': 1500}]",
"summary": "Total sales for each product:\n- Product A: 1000\n- Product B: 1500"
},
{
# 140
"question": "List all vendor names from the USA.",
"raw_data":"[{'VENDOR_NAME': 'Vendor X', 'COUNTRY_CODE': 'USA'}, {'VENDOR_NAME': 'Vendor Y', 'COUNTRY_CODE': 'USA'}]",
"summary": "Vendors from the USA:\n- Vendor X\n- Vendor Y"
},
{
# 150
"question": "What are the names of products with sales greater than 500?",
"raw_data":"[{'PRODUCT_NAME': 'Product A', 'TOTAL_SALES': 1000}, {'PRODUCT_NAME': 'Product B', 'TOTAL_SALES': 700}]",
"summary": "Products with sales greater than 500:\n- Product A: 1000\n- Product B: 700"
},
{
# 142
"question": "List all departments and their total number of employees.",
"raw_data":"[{'DEPT_NAME': 'Sales', 'EMPLOYEE_COUNT': 10}, {'DEPT_NAME': 'Marketing', 'EMPLOYEE_COUNT': 8}]",
"summary": "Departments and their total number of employees:\n- Sales: 10\n- Marketing: 8"
},
{
# 200
"question": "What are the names of employees who have worked on projects with payment amounts exceeding 10,000?",
"raw_data": "[{'EMPLOYEE_NAME': 'John Doe', 'PAYMENT_AMOUNT': 12000}, {'EMPLOYEE_NAME': 'Jane Smith', 'PAYMENT_AMOUNT': 15000}, {'EMPLOYEE_NAME': 'Alice Johnson', 'PAYMENT_AMOUNT': 18000}]",
"summary": "Employees who have worked on projects with payment amounts exceeding 10,000:\n- John Doe: 12,000\n- Jane Smith: 15,000\n- Alice Johnson: 18,000"
},
{
# 175
"question": "List the names and sales amounts of products sold in the last quarter.",
"raw_data": "[{'PRODUCT_NAME': 'Product X', 'SALES_AMOUNT': 2000}, {'PRODUCT_NAME': 'Product Y', 'SALES_AMOUNT': 3000}, {'PRODUCT_NAME': 'Product Z', 'SALES_AMOUNT': 4000}]",
"summary": "Products sold in the last quarter:\n- Product X: 2,000\n- Product Y: 3,000\n- Product Z: 4,000"
},
{
# 170
"question": "What are the total sales for each department?",
"raw_data": "[{'DEPT_NAME': 'Sales', 'TOTAL_SALES': 50000}, {'DEPT_NAME': 'Marketing', 'TOTAL_SALES': 60000}, {'DEPT_NAME': 'Engineering', 'TOTAL_SALES': 70000}]",
"summary": "Total sales for each department:\n- Sales: 50,000\n- Marketing: 60,000\n- Engineering: 70,000"
},
{
# 187
"question": "List the employees who have received bonuses and their respective bonus amounts.",
"raw_data": "[{'EMPLOYEE_NAME': 'John Doe', 'BONUS_AMOUNT': 1000}, {'EMPLOYEE_NAME': 'Jane Smith', 'BONUS_AMOUNT': 1500}, {'EMPLOYEE_NAME': 'Alice Johnson', 'BONUS_AMOUNT': 2000}]",
"summary": "Employees who have received bonuses:\n- John Doe: 1,000\n- Jane Smith: 1,500\n- Alice Johnson: 2,000"
},
{
# 186
"question": "Show the vendor names and their corresponding payment amounts for the last fiscal year.",
"raw_data": "[{'VENDOR_NAME': 'Vendor A', 'PAYMENT_AMOUNT': 10000}, {'VENDOR_NAME': 'Vendor B', 'PAYMENT_AMOUNT': 20000}, {'VENDOR_NAME': 'Vendor C', 'PAYMENT_AMOUNT': 30000}]",
"summary": "Vendors and their payment amounts for the last fiscal year:\n- Vendor A: 10,000\n- Vendor B: 20,000\n- Vendor C: 30,000"
},
{
# 158
"question": "List all departments along with the number of projects they have handled.",
"raw_data": "[{'DEPT_NAME': 'Sales', 'PROJECT_COUNT': 5}, {'DEPT_NAME': 'Marketing', 'PROJECT_COUNT': 8}, {'DEPT_NAME': 'Engineering', 'PROJECT_COUNT': 12}]",
"summary": "Departments and the number of projects handled:\n- Sales: 5\n- Marketing: 8\n- Engineering: 12"
},
{
# 185
"question": "What are the names of vendors with payment amounts greater than the average?",
"raw_data": "[{'VENDOR_NAME': 'Vendor X', 'PAYMENT_AMOUNT': 25000}, {'VENDOR_NAME': 'Vendor Y', 'PAYMENT_AMOUNT': 30000}, {'VENDOR_NAME': 'Vendor Z', 'PAYMENT_AMOUNT': 35000}]",
"summary": "Vendors with payment amounts greater than the average:\n- Vendor X: 25,000\n- Vendor Y: 30,000\n- Vendor Z: 35,000"
},
{
# 184
"question": "List the products and their sales amounts that contribute to over 50% of the total sales.",
"raw_data": "[{'PRODUCT_NAME': 'Product A', 'SALES_AMOUNT': 50000}, {'PRODUCT_NAME': 'Product B', 'SALES_AMOUNT': 60000}, {'PRODUCT_NAME': 'Product C', 'SALES_AMOUNT': 70000}]",
"summary": "Products contributing over 50% of total sales:\n- Product A: 50,000\n- Product B: 60,000\n- Product C: 70,000"
},
{
# 174
"question": "Show the employee names and the total number of projects they have worked on in the past year.",
"raw_data": "[{'EMPLOYEE_NAME': 'John Doe', 'PROJECT_COUNT': 4}, {'EMPLOYEE_NAME': 'Jane Smith', 'PROJECT_COUNT': 5}, {'EMPLOYEE_NAME': 'Alice Johnson', 'PROJECT_COUNT': 6}]",
"summary": "Employees and the total number of projects they have worked on in the past year:\n- John Doe: 4 projects\n- Jane Smith: 5 projects\n- Alice Johnson: 6 projects"
},
{
# 223 tokens
"question": "What are the sales figures for each product category in the last quarter?",
"raw_data": "[{'CATEGORY_NAME': 'Electronics', 'SALES': 100000}, {'CATEGORY_NAME': 'Furniture', 'SALES': 150000}, {'CATEGORY_NAME': 'Clothing', 'SALES': 120000}, {'CATEGORY_NAME': 'Sports', 'SALES': 110000}, {'CATEGORY_NAME': 'Toys', 'SALES': 130000}]",
"summary": "Sales figures for each product category in the last quarter:\n- Electronics: 100,000\n- Furniture: 150,000\n- Clothing: 120,000\n- Sports: 110,000\n- Toys: 130,000"
},
# # 216 tokens
# {
# "question": "List the names of employees and the number of projects they have completed this year.",
# "raw_data": "[{'EMPLOYEE_NAME': 'John Doe', 'PROJECT_COUNT': 5}, {'EMPLOYEE_NAME': 'Jane Smith', 'PROJECT_COUNT': 6}, {'EMPLOYEE_NAME': 'Alice Johnson', 'PROJECT_COUNT': 7}, {'EMPLOYEE_NAME': 'Bob Brown', 'PROJECT_COUNT': 8}, {'EMPLOYEE_NAME': 'Charlie Black', 'PROJECT_COUNT': 9}]",
# "summary": "Employees and the number of projects they have completed this year:\n- John Doe: 5 projects\n- Jane Smith: 6 projects\n- Alice Johnson: 7 projects\n- Bob Brown: 8 projects\n- Charlie Black: 9 projects"
# },
# # 211 tokens
# {
# "question": "What are the total sales and number of transactions for each region?",
# "raw_data": "[{'REGION_NAME': 'North', 'TOTAL_SALES': 80000, 'TRANSACTION_COUNT': 120}, {'REGION_NAME': 'South', 'TOTAL_SALES': 90000, 'TRANSACTION_COUNT': 130}, {'REGION_NAME': 'East', 'TOTAL_SALES': 100000, 'TRANSACTION_COUNT': 140}]",
# "summary": "Total sales and number of transactions for each region:\n- North: 80,000 (120 transactions)\n- South: 90,000 (130 transactions)\n- East: 100,000 (140 transactions)"
# }
]
return benchmark_data
# 15 easy test cases
def load_medium_benchmark():
"""Loads benchmark dataset of text context 250 - 500 tokens tokens to summarize"""
benchmark_data = [
{
# 301 tokens
"question": "List the top 5 employees with the highest sales and their respective departments.",
"raw_data": "[{'EMPLOYEE_NAME': 'John Doe', 'DEPT_NAME': 'Sales', 'TOTAL_SALES': 200000}, {'EMPLOYEE_NAME': 'Jane Smith', 'DEPT_NAME': 'Marketing', 'TOTAL_SALES': 180000}, {'EMPLOYEE_NAME': 'Alice Johnson', 'DEPT_NAME': 'Engineering', 'TOTAL_SALES': 170000}, {'EMPLOYEE_NAME': 'Bob Brown', 'DEPT_NAME': 'Sales', 'TOTAL_SALES': 160000}, {'EMPLOYEE_NAME': 'Charlie Black', 'DEPT_NAME': 'Sales', 'TOTAL_SALES': 150000}]",
"summary": "Top 5 employees with the highest sales and their respective departments:\n- John Doe (Sales): 200,000\n- Jane Smith (Marketing): 180,000\n- Alice Johnson (Engineering): 170,000\n- Bob Brown (Sales): 160,000\n- Charlie Black (Sales): 150,000"
},
{
# 464 tokens
"question": "What are the monthly sales for each product in the last 6 months?",
"raw_data": "[{'PRODUCT_NAME': 'Product A', 'MONTH': 'January', 'SALES': 10000}, {'PRODUCT_NAME': 'Product A', 'MONTH': 'February', 'SALES': 12000}, {'PRODUCT_NAME': 'Product A', 'MONTH': 'March', 'SALES': 13000}, {'PRODUCT_NAME': 'Product A', 'MONTH': 'April', 'SALES': 14000}, {'PRODUCT_NAME': 'Product A', 'MONTH': 'May', 'SALES': 15000}, {'PRODUCT_NAME': 'Product A', 'MONTH': 'June', 'SALES': 16000}, {'PRODUCT_NAME': 'Product B', 'MONTH': 'January', 'SALES': 20000}, {'PRODUCT_NAME': 'Product B', 'MONTH': 'February', 'SALES': 22000}, {'PRODUCT_NAME': 'Product B', 'MONTH': 'March', 'SALES': 23000}, {'PRODUCT_NAME': 'Product B', 'MONTH': 'April', 'SALES': 24000}, {'PRODUCT_NAME': 'Product B', 'MONTH': 'May', 'SALES': 25000}, {'PRODUCT_NAME': 'Product B', 'MONTH': 'June', 'SALES': 26000}]",
"summary": "Monthly sales for each product in the last 6 months:\n- Product A:\n - January: 10,000\n - February: 12,000\n - March: 13,000\n - April: 14,000\n - May: 15,000\n - June: 16,000\n- Product B:\n - January: 20,000\n - February: 22,000\n - March: 23,000\n - April: 24,000\n - May: 25,000\n - June: 26,000"
},
{
# 334 tokens
"question": "What are the total expenses and revenues for each department in the last fiscal year?",
"raw_data": "[{'DEPT_NAME': 'Sales', 'TOTAL_EXPENSES': 30000, 'TOTAL_REVENUES': 500000}, {'DEPT_NAME': 'Marketing', 'TOTAL_EXPENSES': 20000, 'TOTAL_REVENUES': 400000}, {'DEPT_NAME': 'Engineering', 'TOTAL_EXPENSES': 25000, 'TOTAL_REVENUES': 450000}, {'DEPT_NAME': 'HR', 'TOTAL_EXPENSES': 15000, 'TOTAL_REVENUES': 350000}, {'DEPT_NAME': 'Finance', 'TOTAL_EXPENSES': 10000, 'TOTAL_REVENUES': 300000}]",
"summary": "Total expenses and revenues for each department in the last fiscal year:\n- Sales: 30,000 expenses, 500,000 revenues\n- Marketing: 20,000 expenses, 400,000 revenues\n- Engineering: 25,000 expenses, 450,000 revenues\n- HR: 15,000 expenses, 350,000 revenues\n- Finance: 10,000 expenses, 300,000 revenues"
},
{
# 471 tokens
"question": "What are the monthly sales figures and the number of returns for each product?",
"raw_data": "[{'PRODUCT_NAME': 'Product A', 'MONTH': 'January', 'SALES': 10000, 'RETURNS': 50}, {'PRODUCT_NAME': 'Product A', 'MONTH': 'February', 'SALES': 12000, 'RETURNS': 60}, {'PRODUCT_NAME': 'Product A', 'MONTH': 'March', 'SALES': 13000, 'RETURNS': 70}, {'PRODUCT_NAME': 'Product B', 'MONTH': 'January', 'SALES': 20000, 'RETURNS': 100}, {'PRODUCT_NAME': 'Product B', 'MONTH': 'February', 'SALES': 22000, 'RETURNS': 110}, {'PRODUCT_NAME': 'Product B', 'MONTH': 'March', 'SALES': 23000, 'RETURNS': 120}, {'PRODUCT_NAME': 'Product C', 'MONTH': 'January', 'SALES': 15000, 'RETURNS': 75}, {'PRODUCT_NAME': 'Product C', 'MONTH': 'February', 'SALES': 17000, 'RETURNS': 85}, {'PRODUCT_NAME': 'Product C', 'MONTH': 'March', 'SALES': 18000, 'RETURNS': 95}]",
"summary": "Monthly sales figures and number of returns for each product:\n- Product A:\n - January: 10,000 sales, 50 returns\n - February: 12,000 sales, 60 returns\n - March: 13,000 sales, 70 returns\n- Product B:\n - January: 20,000 sales, 100 returns\n - February: 22,000 sales, 110 returns\n - March: 23,000 sales, 120 returns\n- Product C:\n - January: 15,000 sales, 75 returns\n - February: 17,000 sales, 85 returns\n - March: 18,000 sales, 95 returns"
},
{
# 302 tokens
"question": "List the top 5 vendors with the highest total sales and the number of products they have sold.",
"raw_data": "[{'VENDOR_NAME': 'Vendor X', 'TOTAL_SALES': 300000, 'PRODUCT_COUNT': 50}, {'VENDOR_NAME': 'Vendor Y', 'TOTAL_SALES': 280000, 'PRODUCT_COUNT': 45}, {'VENDOR_NAME': 'Vendor Z', 'TOTAL_SALES': 260000, 'PRODUCT_COUNT': 40}, {'VENDOR_NAME': 'Vendor A', 'TOTAL_SALES': 250000, 'PRODUCT_COUNT': 35}, {'VENDOR_NAME': 'Vendor B', 'TOTAL_SALES': 240000, 'PRODUCT_COUNT': 30}]",
"summary": "Top 5 vendors with the highest total sales and the number of products they have sold:\n- Vendor X: 300,000 total sales, 50 products\n- Vendor Y: 280,000 total sales, 45 products\n- Vendor Z: 260,000 total sales, 40 products\n- Vendor A: 250,000 total sales, 35 products\n- Vendor B: 240,000 total sales, 30 products"
},
{
# 432 tokens
"question": "What are the annual performance metrics for each employee in terms of sales, expenses, and profit?",
"raw_data": "[{'EMPLOYEE_NAME': 'John Doe', 'ANNUAL_SALES': 300000, 'ANNUAL_EXPENSES': 200000, 'ANNUAL_PROFIT': 100000}, {'EMPLOYEE_NAME': 'Jane Smith', 'ANNUAL_SALES': 250000, 'ANNUAL_EXPENSES': 150000, 'ANNUAL_PROFIT': 100000}, {'EMPLOYEE_NAME': 'Alice Johnson', 'ANNUAL_SALES': 200000, 'ANNUAL_EXPENSES': 100000, 'ANNUAL_PROFIT': 100000}, {'EMPLOYEE_NAME': 'Bob Brown', 'ANNUAL_SALES': 150000, 'ANNUAL_EXPENSES': 50000, 'ANNUAL_PROFIT': 100000}, {'EMPLOYEE_NAME': 'Charlie Black', 'ANNUAL_SALES': 100000, 'ANNUAL_EXPENSES': 50000, 'ANNUAL_PROFIT': 50000}]",
"summary": "Annual performance metrics for each employee:\n- John Doe: 300,000 sales, 200,000 expenses, 100,000 profit\n- Jane Smith: 250,000 sales, 150,000 expenses, 100,000 profit\n- Alice Johnson: 200,000 sales, 100,000 expenses, 100,000 profit\n- Bob Brown: 150,000 sales, 50,000 expenses, 100,000 profit\n- Charlie Black: 100,000 sales, 50,000 expenses, 50,000 profit"
},
{
# 297 tokens
"question": "Provide a summary of the total revenues and expenses for the company's five departments.",
"raw_data": "[{'DEPT_NAME': 'Sales', 'REVENUES': 500000, 'EXPENSES': 30000}, {'DEPT_NAME': 'Marketing', 'REVENUES': 400000, 'EXPENSES': 20000}, {'DEPT_NAME': 'Engineering', 'REVENUES': 450000, 'EXPENSES': 25000}, {'DEPT_NAME': 'HR', 'REVENUES': 350000, 'EXPENSES': 15000}, {'DEPT_NAME': 'Finance', 'REVENUES': 300000, 'EXPENSES': 10000}]",
"summary": "Total revenues and expenses for the company's departments:\n- Sales: 500,000 revenues, 30,000 expenses\n- Marketing: 400,000 revenues, 20,000 expenses\n- Engineering: 450,000 revenues, 25,000 expenses\n- HR: 350,000 revenues, 15,000 expenses\n- Finance: 300,000 revenues, 10,000 expenses"
},
{
# 321 tokens
"question": "What are the total expenditures and the average transaction amount for each vendor?",
"raw_data": "[{'VENDOR_NAME': 'Vendor A', 'TOTAL_EXPENDITURES': 120000, 'TRANSACTION_COUNT': 240}, {'VENDOR_NAME': 'Vendor B', 'TOTAL_EXPENDITURES': 95000, 'TRANSACTION_COUNT': 190}, {'VENDOR_NAME': 'Vendor C', 'TOTAL_EXPENDITURES': 110000, 'TRANSACTION_COUNT': 220}, {'VENDOR_NAME': 'Vendor D', 'TOTAL_EXPENDITURES': 105000, 'TRANSACTION_COUNT': 210}, {'VENDOR_NAME': 'Vendor E', 'TOTAL_EXPENDITURES': 98000, 'TRANSACTION_COUNT': 196}]",
"summary": "Total expenditures and average transaction amount for each vendor:\n- Vendor A: 120,000 expenditures (240 transactions)\n- Vendor B: 95,000 expenditures (190 transactions)\n- Vendor C: 110,000 expenditures (220 transactions)\n- Vendor D: 105,000 expenditures (210 transactions)\n- Vendor E: 98,000 expenditures (196 transactions)"
},
{
# 372 tokens
"question": "Summarize the quarterly revenue and expense data for each department.",
"raw_data": "[{'DEPT_NAME': 'Sales', 'QUARTER': 'Q1', 'REVENUE': 150000, 'EXPENSE': 7500}, {'DEPT_NAME': 'Sales', 'QUARTER': 'Q2', 'REVENUE': 130000, 'EXPENSE': 6500}, {'DEPT_NAME': 'Marketing', 'QUARTER': 'Q1', 'REVENUE': 120000, 'EXPENSE': 7000}, {'DEPT_NAME': 'Marketing', 'QUARTER': 'Q2', 'REVENUE': 110000, 'EXPENSE': 6000}, {'DEPT_NAME': 'Engineering', 'QUARTER': 'Q1', 'REVENUE': 135000, 'EXPENSE': 8500}, {'DEPT_NAME': 'Engineering', 'QUARTER': 'Q2', 'REVENUE': 140000, 'EXPENSE': 9000}]",
"summary": "Quarterly revenue and expense data for each department:\n- Sales: Q1: 150,000 revenue, 7,500 expense; Q2: 130,000 revenue, 6,500 expense\n- Marketing: Q1: 120,000 revenue, 7,000 expense; Q2: 110,000 revenue, 6,000 expense\n- Engineering: Q1: 135,000 revenue, 8,500 expense; Q2: 140,000 revenue, 9,000 expense"
},
{
# 309 tokens
"question": "What are the total payments and the number of transactions for each vendor in the last quarter?",
"raw_data": "[{'VENDOR_NAME': 'Vendor X', 'TOTAL_PAYMENTS': 100000, 'TRANSACTIONS': 200}, {'VENDOR_NAME': 'Vendor Y', 'TOTAL_PAYMENTS': 95000, 'TRANSACTIONS': 180}, {'VENDOR_NAME': 'Vendor Z', 'TOTAL_PAYMENTS': 120000, 'TRANSACTIONS': 210}, {'VENDOR_NAME': 'Vendor W', 'TOTAL_PAYMENTS': 110000, 'TRANSACTIONS': 190}, {'VENDOR_NAME': 'Vendor V', 'TOTAL_PAYMENTS': 105000, 'TRANSACTIONS': 205}]",
"summary": "Total payments and number of transactions for each vendor in the last quarter:\n- Vendor X: 100,000 payments (200 transactions)\n- Vendor Y: 95,000 payments (180 transactions)\n- Vendor Z: 120,000 payments (210 transactions)\n- Vendor W: 110,000 payments (190 transactions)\n- Vendor V: 105,000 payments (205 transactions)"
},
{
# 309 tokens
"question": "Summarize the sales and number of units sold for each product category.",
"raw_data": "[{'CATEGORY': 'Electronics', 'TOTAL_SALES': 200000, 'UNITS_SOLD': 4000}, {'CATEGORY': 'Furniture', 'TOTAL_SALES': 150000, 'UNITS_SOLD': 3000}, {'CATEGORY': 'Clothing', 'TOTAL_SALES': 180000, 'UNITS_SOLD': 3600}, {'CATEGORY': 'Toys', 'TOTAL_SALES': 140000, 'UNITS_SOLD': 2800}, {'CATEGORY': 'Books', 'TOTAL_SALES': 120000, 'UNITS_SOLD': 2400}]",
"summary": "Sales and number of units sold for each product category:\n- Electronics: 200,000 sales (4000 units)\n- Furniture: 150,000 sales (3000 units)\n- Clothing: 180,000 sales (3600 units)\n- Toys: 140,000 sales (2800 units)\n- Books: 120,000 sales (2400 units)"
},
{
# 295 tokens
"question": "Provide a detailed report of the number of employees and the total hours worked in each department.",
"raw_data": "[{'DEPT_NAME': 'Sales', 'EMPLOYEE_COUNT': 50, 'TOTAL_HOURS': 8000}, {'DEPT_NAME': 'Marketing', 'EMPLOYEE_COUNT': 40, 'TOTAL_HOURS': 6400}, {'DEPT_NAME': 'Engineering', 'EMPLOYEE_COUNT': 60, 'TOTAL_HOURS': 9600}, {'DEPT_NAME': 'HR', 'EMPLOYEE_COUNT': 30, 'TOTAL_HOURS': 4800}, {'DEPT_NAME': 'Finance', 'EMPLOYEE_COUNT': 35, 'TOTAL_HOURS': 5600}]",
"summary": "Number of employees and total hours worked in each department:\n- Sales: 50 employees, 8000 hours\n- Marketing: 40 employees, 6400 hours\n- Engineering: 60 employees, 9600 hours\n- HR: 30 employees, 4800 hours\n- Finance: 35 employees, 5600 hours"
},
{
# 440 tokens
"question": "What are the total sales and number of units sold for each product in the last year?",
"raw_data": "[{'PRODUCT_NAME': 'Product A', 'TOTAL_SALES': 120000, 'UNITS_SOLD': 4000}, {'PRODUCT_NAME': 'Product B', 'TOTAL_SALES': 110000, 'UNITS_SOLD': 3500}, {'PRODUCT_NAME': 'Product C', 'TOTAL_SALES': 130000, 'UNITS_SOLD': 4200}, {'PRODUCT_NAME': 'Product D', 'TOTAL_SALES': 115000, 'UNITS_SOLD': 3800}, {'PRODUCT_NAME': 'Product E', 'TOTAL_SALES': 105000, 'UNITS_SOLD': 3400}, {'PRODUCT_NAME': 'Product F', 'TOTAL_SALES': 125000, 'UNITS_SOLD': 4100}, {'PRODUCT_NAME': 'Product G', 'TOTAL_SALES': 135000, 'UNITS_SOLD': 4300}, {'PRODUCT_NAME': 'Product H', 'TOTAL_SALES': 140000, 'UNITS_SOLD': 4500}]",
"summary": "Total sales and number of units sold for each product in the last year:\n- Product A: 120,000 sales (4,000 units)\n- Product B: 110,000 sales (3,500 units)\n- Product C: 130,000 sales (4,200 units)\n- Product D: 115,000 sales (3,800 units)\n- Product E: 105,000 sales (3,400 units)\n- Product F: 125,000 sales (4,100 units)\n- Product G: 135,000 sales (4,300 units)\n- Product H: 140,000 sales (4,500 units)"
},
{
# 338 tokens
"question": "What are the annual sales and average sales per transaction for each product in the Electronics category?",
"raw_data": "[{'PRODUCT_NAME': 'Laptop', 'ANNUAL_SALES': 120000, 'TRANSACTION_COUNT': 400}, {'PRODUCT_NAME': 'Smartphone', 'ANNUAL_SALES': 100000, 'TRANSACTION_COUNT': 500}, {'PRODUCT_NAME': 'Tablet', 'ANNUAL_SALES': 80000, 'TRANSACTION_COUNT': 300}, {'PRODUCT_NAME': 'Headphones', 'ANNUAL_SALES': 50000, 'TRANSACTION_COUNT': 600}, {'PRODUCT_NAME': 'Smartwatch', 'ANNUAL_SALES': 60000, 'TRANSACTION_COUNT': 400}, {'PRODUCT_NAME': 'Camera', 'ANNUAL_SALES': 90000, 'TRANSACTION_COUNT': 450}]",
"summary": "Annual sales and average sales per transaction for each product in the Electronics category:\n- Laptop: 120,000 annual sales, 300 average sales per transaction\n- Smartphone: 100,000 annual sales, 200 average sales per transaction\n- Tablet: 80,000 annual sales, 267 average sales per transaction\n- Headphones: 50,000 annual sales, 83 average sales per transaction\n- Smartwatch: 60,000 annual sales, 150 average sales per transaction\n- Camera: 90,000 annual sales, 200 average sales per transaction"
},
{
# 334 tokens
"question": "Provide a detailed breakdown of the total hours worked and average hours worked per employee for each department.",
"raw_data": "[{'DEPT_NAME': 'Sales', 'TOTAL_HOURS': 8000, 'EMPLOYEE_COUNT': 50}, {'DEPT_NAME': 'Marketing', 'TOTAL_HOURS': 6400, 'EMPLOYEE_COUNT': 40}, {'DEPT_NAME': 'Engineering', 'TOTAL_HOURS': 9600, 'EMPLOYEE_COUNT': 60}, {'DEPT_NAME': 'HR', 'TOTAL_HOURS': 4800, 'EMPLOYEE_COUNT': 30}, {'DEPT_NAME': 'Finance', 'TOTAL_HOURS': 5600, 'EMPLOYEE_COUNT': 35}, {'DEPT_NAME': 'Support', 'TOTAL_HOURS': 7200, 'EMPLOYEE_COUNT': 45}]",
"summary": "Total hours worked and average hours worked per employee for each department:\n- Sales: 8000 total hours, 160 average hours per employee\n- Marketing: 6400 total hours, 160 average hours per employee\n- Engineering: 9600 total hours, 160 average hours per employee\n- HR: 4800 total hours, 160 average hours per employee\n- Finance: 5600 total hours, 160 average hours per employee\n- Support: 7200 total hours, 160 average hours per employee"
}
]
return benchmark_data
# 15 hard test cases
def load_hard_benchmark():
"""Loads benchmark dataset of text context > 500 tokens to summarize"""
benchmark_data = [
{
# 600 tokens
"question": "What are the quarterly sales and number of transactions for each product category?",
"raw_data": "[{'CATEGORY_NAME': 'Electronics', 'Q1_SALES': 50000, 'Q1_TRANSACTIONS': 300, 'Q2_SALES': 60000, 'Q2_TRANSACTIONS': 350, 'Q3_SALES': 70000, 'Q3_TRANSACTIONS': 400, 'Q4_SALES': 80000, 'Q4_TRANSACTIONS': 450}, {'CATEGORY_NAME': 'Furniture', 'Q1_SALES': 40000, 'Q1_TRANSACTIONS': 250, 'Q2_SALES': 50000, 'Q2_TRANSACTIONS': 300, 'Q3_SALES': 60000, 'Q3_TRANSACTIONS': 350, 'Q4_SALES': 70000, 'Q4_TRANSACTIONS': 400}, {'CATEGORY_NAME': 'Clothing', 'Q1_SALES': 30000, 'Q1_TRANSACTIONS': 200, 'Q2_SALES': 40000, 'Q2_TRANSACTIONS': 250, 'Q3_SALES': 50000, 'Q3_TRANSACTIONS': 300, 'Q4_SALES': 60000, 'Q4_TRANSACTIONS': 350}, {'CATEGORY_NAME': 'Toys', 'Q1_SALES': 20000, 'Q1_TRANSACTIONS': 150, 'Q2_SALES': 30000, 'Q2_TRANSACTIONS': 200, 'Q3_SALES': 40000, 'Q3_TRANSACTIONS': 250, 'Q4_SALES': 50000, 'Q4_TRANSACTIONS': 300}]",
"summary": "Quarterly sales and number of transactions for each product category:\n- Electronics:\n - Q1: 50,000 sales, 300 transactions\n - Q2: 60,000 sales, 350 transactions\n - Q3: 70,000 sales, 400 transactions\n - Q4: 80,000 sales, 450 transactions\n- Furniture:\n - Q1: 40,000 sales, 250 transactions\n - Q2: 50,000 sales, 300 transactions\n - Q3: 60,000 sales, 350 transactions\n - Q4: 70,000 sales, 400 transactions\n- Clothing:\n - Q1: 30,000 sales, 200 transactions\n - Q2: 40,000 sales, 250 transactions\n - Q3: 50,000 sales, 300 transactions\n - Q4: 60,000 sales, 350 transactions\n- Toys:\n - Q1: 20,000 sales, 150 transactions\n - Q2: 30,000 sales, 200 transactions\n - Q3: 40,000 sales, 250 transactions\n - Q4: 50,000 sales, 300 transactions"
},
{
# 502 tokens
"question": "List the performance metrics of each department for the last year.",
"raw_data": "[{'DEPT_NAME': 'Sales', 'REVENUE': 500000, 'EXPENSES': 300000, 'PROFIT': 200000, 'EMPLOYEE_COUNT': 50, 'CUSTOMER_SATISFACTION': 85}, {'DEPT_NAME': 'Marketing', 'REVENUE': 400000, 'EXPENSES': 200000, 'PROFIT': 200000, 'EMPLOYEE_COUNT': 40, 'CUSTOMER_SATISFACTION': 80}, {'DEPT_NAME': 'Engineering', 'REVENUE': 450000, 'EXPENSES': 250000, 'PROFIT': 200000, 'EMPLOYEE_COUNT': 45, 'CUSTOMER_SATISFACTION': 90}, {'DEPT_NAME': 'HR', 'REVENUE': 350000, 'EXPENSES': 150000, 'PROFIT': 200000, 'EMPLOYEE_COUNT': 35, 'CUSTOMER_SATISFACTION': 75}, {'DEPT_NAME': 'Finance', 'REVENUE': 300000, 'EXPENSES': 100000, 'PROFIT': 200000, 'EMPLOYEE_COUNT': 30, 'CUSTOMER_SATISFACTION': 95}]",
"summary": "Performance metrics of each department for the last year:\n- Sales:\n - Revenue: 500,000\n - Expenses: 300,000\n - Profit: 200,000\n - Employee Count: 50\n - Customer Satisfaction: 85\n- Marketing:\n - Revenue: 400,000\n - Expenses: 200,000\n - Profit: 200,000\n - Employee Count: 40\n - Customer Satisfaction: 80\n- Engineering:\n - Revenue: 450,000\n - Expenses: 250,000\n - Profit: 200,000\n - Employee Count: 45\n - Customer Satisfaction: 90\n- HR:\n - Revenue: 350,000\n - Expenses: 150,000\n - Profit: 200,000\n - Employee Count: 35\n - Customer Satisfaction: 75\n- Finance:\n - Revenue: 300,000\n - Expenses: 100,000\n - Profit: 200,000\n - Employee Count: 30\n - Customer Satisfaction: 95"
},
{
# 626 tokens
"question": "List the monthly performance metrics for each department in terms of revenue, expenses, and profit.",
"raw_data": "[{'DEPT_NAME': 'Sales', 'MONTH': 'January', 'REVENUE': 50000, 'EXPENSES': 30000, 'PROFIT': 20000}, {'DEPT_NAME': 'Sales', 'MONTH': 'February', 'REVENUE': 60000, 'EXPENSES': 35000, 'PROFIT': 25000}, {'DEPT_NAME': 'Sales', 'MONTH': 'March', 'REVENUE': 70000, 'EXPENSES': 40000, 'PROFIT': 30000}, {'DEPT_NAME': 'Marketing', 'MONTH': 'January', 'REVENUE': 40000, 'EXPENSES': 20000, 'PROFIT': 20000}, {'DEPT_NAME': 'Marketing', 'MONTH': 'February', 'REVENUE': 50000, 'EXPENSES': 25000, 'PROFIT': 25000}, {'DEPT_NAME': 'Marketing', 'MONTH': 'March', 'REVENUE': 60000, 'EXPENSES': 30000, 'PROFIT': 30000}, {'DEPT_NAME': 'Engineering', 'MONTH': 'January', 'REVENUE': 30000, 'EXPENSES': 15000, 'PROFIT': 15000}, {'DEPT_NAME': 'Engineering', 'MONTH': 'February', 'REVENUE': 40000, 'EXPENSES': 20000, 'PROFIT': 20000}, {'DEPT_NAME': 'Engineering', 'MONTH': 'March', 'REVENUE': 50000, 'EXPENSES': 25000, 'PROFIT': 25000}]",
"summary": "Monthly performance metrics for each department:\n- Sales:\n - January: 50,000 revenue, 30,000 expenses, 20,000 profit\n - February: 60,000 revenue, 35,000 expenses, 25,000 profit\n - March: 70,000 revenue, 40,000 expenses, 30,000 profit\n- Marketing:\n - January: 40,000 revenue, 20,000 expenses, 20,000 profit\n - February: 50,000 revenue, 25,000 expenses, 25,000 profit\n - March: 60,000 revenue, 30,000 expenses, 30,000 profit\n- Engineering:\n - January: 30,000 revenue, 15,000 expenses, 15,000 profit\n - February: 40,000 revenue, 20,000 expenses, 20,000 profit\n - March: 50,000 revenue, 25,000 expenses, 25,000 profit"
},
{
# 600 tokens
"question": "What are the quarterly sales and number of transactions for each product category?",
"raw_data": "[{'CATEGORY_NAME': 'Electronics', 'Q1_SALES': 50000, 'Q1_TRANSACTIONS': 300, 'Q2_SALES': 60000, 'Q2_TRANSACTIONS': 350, 'Q3_SALES': 70000, 'Q3_TRANSACTIONS': 400, 'Q4_SALES': 80000, 'Q4_TRANSACTIONS': 450}, {'CATEGORY_NAME': 'Furniture', 'Q1_SALES': 40000, 'Q1_TRANSACTIONS': 250, 'Q2_SALES': 50000, 'Q2_TRANSACTIONS': 300, 'Q3_SALES': 60000, 'Q3_TRANSACTIONS': 350, 'Q4_SALES': 70000, 'Q4_TRANSACTIONS': 400}, {'CATEGORY_NAME': 'Clothing', 'Q1_SALES': 30000, 'Q1_TRANSACTIONS': 200, 'Q2_SALES': 40000, 'Q2_TRANSACTIONS': 250, 'Q3_SALES': 50000, 'Q3_TRANSACTIONS': 300, 'Q4_SALES': 60000, 'Q4_TRANSACTIONS': 350}, {'CATEGORY_NAME': 'Toys', 'Q1_SALES': 20000, 'Q1_TRANSACTIONS': 150, 'Q2_SALES': 30000, 'Q2_TRANSACTIONS': 200, 'Q3_SALES': 40000, 'Q3_TRANSACTIONS': 250, 'Q4_SALES': 50000, 'Q4_TRANSACTIONS': 300}]",
"summary": "Quarterly sales and number of transactions for each product category:\n- Electronics:\n - Q1: 50,000 sales, 300 transactions\n - Q2: 60,000 sales, 350 transactions\n - Q3: 70,000 sales, 400 transactions\n - Q4: 80,000 sales, 450 transactions\n- Furniture:\n - Q1: 40,000 sales, 250 transactions\n - Q2: 50,000 sales, 300 transactions\n - Q3: 60,000 sales, 350 transactions\n - Q4: 70,000 sales, 400 transactions\n- Clothing:\n - Q1: 30,000 sales, 200 transactions\n - Q2: 40,000 sales, 250 transactions\n - Q3: 50,000 sales, 300 transactions\n - Q4: 60,000 sales, 350 transactions\n- Toys:\n - Q1: 20,000 sales, 150 transactions\n - Q2: 30,000 sales, 200 transactions\n - Q3: 40,000 sales, 250 transactions\n - Q4: 50,000 sales, 300 transactions"
},
{
# 628 tokens
"question": "What are the monthly sales figures and transaction counts for each region?",
"raw_data": "[{'REGION': 'North', 'JAN_SALES': 30000, 'JAN_TRANSACTIONS': 150, 'FEB_SALES': 32000, 'FEB_TRANSACTIONS': 160, 'MAR_SALES': 35000, 'MAR_TRANSACTIONS': 170, 'APR_SALES': 37000, 'APR_TRANSACTIONS': 180, 'MAY_SALES': 40000, 'MAY_TRANSACTIONS': 190, 'JUN_SALES': 42000, 'JUN_TRANSACTIONS': 200}, {'REGION': 'South', 'JAN_SALES': 28000, 'JAN_TRANSACTIONS': 140, 'FEB_SALES': 30000, 'FEB_TRANSACTIONS': 150, 'MAR_SALES': 33000, 'MAR_TRANSACTIONS': 160, 'APR_SALES': 35000, 'APR_TRANSACTIONS': 170, 'MAY_SALES': 38000, 'MAY_TRANSACTIONS': 180, 'JUN_SALES': 40000, 'JUN_TRANSACTIONS': 190}, {'REGION': 'East', 'JAN_SALES': 25000, 'JAN_TRANSACTIONS': 130, 'FEB_SALES': 27000, 'FEB_TRANSACTIONS': 140, 'MAR_SALES': 30000, 'MAR_TRANSACTIONS': 150, 'APR_SALES': 32000, 'APR_TRANSACTIONS': 160, 'MAY_SALES': 35000, 'MAY_TRANSACTIONS': 170, 'JUN_SALES': 37000, 'JUN_TRANSACTIONS': 180}]",
"summary": "Monthly sales figures and transaction counts for each region:\n- North:\n - January: 30,000 sales, 150 transactions\n - February: 32,000 sales, 160 transactions\n - March: 35,000 sales, 170 transactions\n - April: 37,000 sales, 180 transactions\n - May: 40,000 sales, 190 transactions\n - June: 42,000 sales, 200 transactions\n- South:\n - January: 28,000 sales, 140 transactions\n - February: 30,000 sales, 150 transactions\n - March: 33,000 sales, 160 transactions\n - April: 35,000 sales, 170 transactions\n - May: 38,000 sales, 180 transactions\n - June: 40,000 sales, 190 transactions\n- East:\n - January: 25,000 sales, 130 transactions\n - February: 27,000 sales, 140 transactions\n - March: 30,000 sales, 150 transactions\n - April: 32,000 sales, 160 transactions\n - May: 35,000 sales, 170 transactions\n - June: 37,000 sales, 180 transactions"
},
{
# 524 tokens
"question": "What are the yearly performance metrics for each sales region, including total sales, total expenses, and net profit?",
"raw_data": "[{'REGION': 'North America', 'YEAR': 2023, 'TOTAL_SALES': 1500000, 'TOTAL_EXPENSES': 900000, 'NET_PROFIT': 600000}, {'REGION': 'Europe', 'YEAR': 2023, 'TOTAL_SALES': 1300000, 'TOTAL_EXPENSES': 800000, 'NET_PROFIT': 500000}, {'REGION': 'Asia', 'YEAR': 2023, 'TOTAL_SALES': 1200000, 'TOTAL_EXPENSES': 700000, 'NET_PROFIT': 500000}, {'REGION': 'South America', 'YEAR': 2023, 'TOTAL_SALES': 1100000, 'TOTAL_EXPENSES': 650000, 'NET_PROFIT': 450000}, {'REGION': 'Africa', 'YEAR': 2023, 'TOTAL_SALES': 1000000, 'TOTAL_EXPENSES': 600000, 'NET_PROFIT': 400000}, {'REGION': 'Australia', 'YEAR': 2023, 'TOTAL_SALES': 950000, 'TOTAL_EXPENSES': 550000, 'NET_PROFIT': 400000}]",
"summary": "Yearly performance metrics for each sales region:\n- North America: 1,500,000 total sales, 900,000 total expenses, 600,000 net profit\n- Europe: 1,300,000 total sales, 800,000 total expenses, 500,000 net profit\n- Asia: 1,200,000 total sales, 700,000 total expenses, 500,000 net profit\n- South America: 1,100,000 total sales, 650,000 total expenses, 450,000 net profit\n- Africa: 1,000,000 total sales, 600,000 total expenses, 400,000 net profit\n- Australia: 950,000 total sales, 550,000 total expenses, 400,000 net profit"
},
{
# 611 tokens
"question": "What are the quarterly performance metrics for each employee in the Engineering department, including total projects handled, hours worked, and client satisfaction ratings?",
"raw_data": "[{'EMPLOYEE_NAME': 'John Doe', 'Q1_PROJECTS': 5, 'Q1_HOURS': 160, 'Q1_SATISFACTION': 90, 'Q2_PROJECTS': 6, 'Q2_HOURS': 170, 'Q2_SATISFACTION': 92, 'Q3_PROJECTS': 7, 'Q3_HOURS': 180, 'Q3_SATISFACTION': 94, 'Q4_PROJECTS': 8, 'Q4_HOURS': 190, 'Q4_SATISFACTION': 96}, {'EMPLOYEE_NAME': 'Jane Smith', 'Q1_PROJECTS': 4, 'Q1_HOURS': 150, 'Q1_SATISFACTION': 88, 'Q2_PROJECTS': 5, 'Q2_HOURS': 160, 'Q2_SATISFACTION': 89, 'Q3_PROJECTS': 6, 'Q3_HOURS': 170, 'Q3_SATISFACTION': 91, 'Q4_PROJECTS': 7, 'Q4_HOURS': 180, 'Q4_SATISFACTION': 93}, {'EMPLOYEE_NAME': 'Bob Johnson', 'Q1_PROJECTS': 6, 'Q1_HOURS': 165, 'Q1_SATISFACTION': 92, 'Q2_PROJECTS': 7, 'Q2_HOURS': 175, 'Q2_SATISFACTION': 93, 'Q3_PROJECTS': 8, 'Q3_HOURS': 185, 'Q3_SATISFACTION': 95, 'Q4_PROJECTS': 9, 'Q4_HOURS': 195, 'Q4_SATISFACTION': 97}]",
"summary": "Quarterly performance metrics for each employee in the Engineering department:\n- John Doe:\n - Q1: 5 projects, 160 hours, 90 satisfaction\n - Q2: 6 projects, 170 hours, 92 satisfaction\n - Q3: 7 projects, 180 hours, 94 satisfaction\n - Q4: 8 projects, 190 hours, 96 satisfaction\n- Jane Smith:\n - Q1: 4 projects, 150 hours, 88 satisfaction\n - Q2: 5 projects, 160 hours, 89 satisfaction\n - Q3: 6 projects, 170 hours, 91 satisfaction\n - Q4: 7 projects, 180 hours, 93 satisfaction\n- Bob Johnson:\n - Q1: 6 projects, 165 hours, 92 satisfaction\n - Q2: 7 projects, 175 hours, 93 satisfaction\n - Q3: 8 projects, 185 hours, 95 satisfaction\n - Q4: 9 projects, 195 hours, 97 satisfaction"
},
{
# 555 tokens
"question": "What are the quarterly revenue and expense figures for each product category?",
"raw_data": "[{'CATEGORY': 'Electronics', 'Q1_REVENUE': 500000, 'Q1_EXPENSES': 200000, 'Q2_REVENUE': 550000, 'Q2_EXPENSES': 220000, 'Q3_REVENUE': 600000, 'Q3_EXPENSES': 240000, 'Q4_REVENUE': 650000, 'Q4_EXPENSES': 260000}, {'CATEGORY': 'Furniture', 'Q1_REVENUE': 300000, 'Q1_EXPENSES': 120000, 'Q2_REVENUE': 320000, 'Q2_EXPENSES': 130000, 'Q3_REVENUE': 350000, 'Q3_EXPENSES': 140000, 'Q4_REVENUE': 370000, 'Q4_EXPENSES': 150000}, {'CATEGORY': 'Clothing', 'Q1_REVENUE': 400000, 'Q1_EXPENSES': 150000, 'Q2_REVENUE': 420000, 'Q2_EXPENSES': 160000, 'Q3_REVENUE': 450000, 'Q3_EXPENSES': 170000, 'Q4_REVENUE': 480000, 'Q4_EXPENSES': 180000}]",
"summary": "Quarterly revenue and expense figures for each product category:\n- Electronics:\n - Q1: 500,000 revenue, 200,000 expenses\n - Q2: 550,000 revenue, 220,000 expenses\n - Q3: 600,000 revenue, 240,000 expenses\n - Q4: 650,000 revenue, 260,000 expenses\n- Furniture:\n - Q1: 300,000 revenue, 120,000 expenses\n - Q2: 320,000 revenue, 130,000 expenses\n - Q3: 350,000 revenue, 140,000 expenses\n - Q4: 370,000 revenue, 150,000 expenses\n- Clothing:\n - Q1: 400,000 revenue, 150,000 expenses\n - Q2: 420,000 revenue, 160,000 expenses\n - Q3: 450,000 revenue, 170,000 expenses\n - Q4: 480,000 revenue, 180,000 expenses"
},
{
# 537 tokens
"question": "What are the annual sales and profit margins for each product in the last two years?",
"raw_data": "[{'PRODUCT': 'Laptop', 'YEAR_1_SALES': 1000000, 'YEAR_1_PROFIT_MARGIN': 0.25, 'YEAR_2_SALES': 1100000, 'YEAR_2_PROFIT_MARGIN': 0.26}, {'PRODUCT': 'Smartphone', 'YEAR_1_SALES': 1500000, 'YEAR_1_PROFIT_MARGIN': 0.30, 'YEAR_2_SALES': 1600000, 'YEAR_2_PROFIT_MARGIN': 0.31}, {'PRODUCT': 'Tablet', 'YEAR_1_SALES': 800000, 'YEAR_1_PROFIT_MARGIN': 0.20, 'YEAR_2_SALES': 900000, 'YEAR_2_PROFIT_MARGIN': 0.22}, {'PRODUCT': 'Headphones', 'YEAR_1_SALES': 500000, 'YEAR_1_PROFIT_MARGIN': 0.15, 'YEAR_2_SALES': 600000, 'YEAR_2_PROFIT_MARGIN': 0.16}, {'PRODUCT': 'Smartwatch', 'YEAR_1_SALES': 700000, 'YEAR_1_PROFIT_MARGIN': 0.18, 'YEAR_2_SALES': 750000, 'YEAR_2_PROFIT_MARGIN': 0.19}]",
"summary": "Annual sales and profit margins for each product in the last two years:\n- Laptop:\n - Year 1: 1,000,000 sales, 25% profit margin\n - Year 2: 1,100,000 sales, 26% profit margin\n- Smartphone:\n - Year 1: 1,500,000 sales, 30% profit margin\n - Year 2: 1,600,000 sales, 31% profit margin\n- Tablet:\n - Year 1: 800,000 sales, 20% profit margin\n - Year 2: 900,000 sales, 22% profit margin\n- Headphones:\n - Year 1: 500,000 sales, 15% profit margin\n - Year 2: 600,000 sales, 16% profit margin\n- Smartwatch:\n - Year 1: 700,000 sales, 18% profit margin\n - Year 2: 750,000 sales, 19% profit margin"
},
{
# 707 tokens
"question": "What are the monthly revenue figures, profit margins, and customer feedback scores for each service category over the past six months?",
"raw_data": "[{'CATEGORY': 'Consulting', 'JAN_REVENUE': 60000, 'JAN_PROFIT_MARGIN': 0.3, 'JAN_FEEDBACK': 88, 'FEB_REVENUE': 62000, 'FEB_PROFIT_MARGIN': 0.32, 'FEB_FEEDBACK': 89, 'MAR_REVENUE': 63000, 'MAR_PROFIT_MARGIN': 0.31, 'MAR_FEEDBACK': 90, 'APR_REVENUE': 65000, 'APR_PROFIT_MARGIN': 0.33, 'APR_FEEDBACK': 91, 'MAY_REVENUE': 66000, 'MAY_PROFIT_MARGIN': 0.34, 'MAY_FEEDBACK': 92, 'JUN_REVENUE': 67000, 'JUN_PROFIT_MARGIN': 0.35, 'JUN_FEEDBACK': 93}, {'CATEGORY': 'Support', 'JAN_REVENUE': 40000, 'JAN_PROFIT_MARGIN': 0.2, 'JAN_FEEDBACK': 85, 'FEB_REVENUE': 42000, 'FEB_PROFIT_MARGIN': 0.22, 'FEB_FEEDBACK': 86, 'MAR_REVENUE': 43000, 'MAR_PROFIT_MARGIN': 0.21, 'MAR_FEEDBACK': 87, 'APR_REVENUE': 45000, 'APR_PROFIT_MARGIN': 0.23, 'APR_FEEDBACK': 88, 'MAY_REVENUE': 46000, 'MAY_PROFIT_MARGIN': 0.24, 'MAY_FEEDBACK': 89, 'JUN_REVENUE': 47000, 'JUN_PROFIT_MARGIN': 0.25, 'JUN_FEEDBACK': 90}]",
"summary": "Monthly revenue figures, profit margins, and customer feedback scores for each service category over the past six months:\n- Consulting:\n - January: 60,000 revenue, 30% profit margin, 88 feedback\n - February: 62,000 revenue, 32% profit margin, 89 feedback\n - March: 63,000 revenue, 31% profit margin, 90 feedback\n - April: 65,000 revenue, 33% profit margin, 91 feedback\n - May: 66,000 revenue, 34% profit margin, 92 feedback\n - June: 67,000 revenue, 35% profit margin, 93 feedback\n- Support:\n - January: 40,000 revenue, 20% profit margin, 85 feedback\n - February: 42,000 revenue, 22% profit margin, 86 feedback\n - March: 43,000 revenue, 21% profit margin, 87 feedback\n - April: 45,000 revenue, 23% profit margin, 88 feedback\n - May: 46,000 revenue, 24% profit margin, 89 feedback\n - June: 47,000 revenue, 25% profit margin, 90 feedback"
},
{
# 694 tokens
"question": "What are the quarterly sales figures, profit margins, and employee performance ratings for each sales team in the company over the past six months?",
"raw_data": "[{'TEAM': 'Alpha', 'Q1_SALES': 80000, 'Q1_PROFIT_MARGIN': 0.25, 'Q1_PERFORMANCE': 90, 'Q2_SALES': 85000, 'Q2_PROFIT_MARGIN': 0.27, 'Q2_PERFORMANCE': 91, 'Q3_SALES': 90000, 'Q3_PROFIT_MARGIN': 0.26, 'Q3_PERFORMANCE': 92}, {'TEAM': 'Beta', 'Q1_SALES': 75000, 'Q1_PROFIT_MARGIN': 0.22, 'Q1_PERFORMANCE': 88, 'Q2_SALES': 78000, 'Q2_PROFIT_MARGIN': 0.24, 'Q2_PERFORMANCE': 89, 'Q3_SALES': 81000, 'Q3_PROFIT_MARGIN': 0.23, 'Q3_PERFORMANCE': 90}, {'TEAM': 'Gamma', 'Q1_SALES': 70000, 'Q1_PROFIT_MARGIN': 0.2, 'Q1_PERFORMANCE': 85, 'Q2_SALES': 73000, 'Q2_PROFIT_MARGIN': 0.22, 'Q2_PERFORMANCE': 86, 'Q3_SALES': 76000, 'Q3_PROFIT_MARGIN': 0.21, 'Q3_PERFORMANCE': 87}, {'TEAM': 'Delta', 'Q1_SALES': 65000, 'Q1_PROFIT_MARGIN': 0.18, 'Q1_PERFORMANCE': 83, 'Q2_SALES': 68000, 'Q2_PROFIT_MARGIN': 0.2, 'Q2_PERFORMANCE': 84, 'Q3_SALES': 71000, 'Q3_PROFIT_MARGIN': 0.19, 'Q3_PERFORMANCE': 85}]",
"summary": "Quarterly sales figures, profit margins, and employee performance ratings for each sales team over the past six months:\n- Alpha:\n - Q1: 80,000 sales, 25% profit margin, 90 performance\n - Q2: 85,000 sales, 27% profit margin, 91 performance\n - Q3: 90,000 sales, 26% profit margin, 92 performance\n- Beta:\n - Q1: 75,000 sales, 22% profit margin, 88 performance\n - Q2: 78,000 sales, 24% profit margin, 89 performance\n - Q3: 81,000 sales, 23% profit margin, 90 performance\n- Gamma:\n - Q1: 70,000 sales, 20% profit margin, 85 performance\n - Q2: 73,000 sales, 22% profit margin, 86 performance\n - Q3: 76,000 sales, 21% profit margin, 87 performance\n- Delta:\n - Q1: 65,000 sales, 18% profit margin, 83 performance\n - Q2: 68,000 sales, 20% profit margin, 84 performance\n - Q3: 71,000 sales, 19% profit margin, 85 performance"
},
{
# 503 tokens
"question": "What are the bi-weekly production metrics for each manufacturing unit, including units produced, units rejected, and average production time?",
"raw_data": "[{'UNIT': 'A', 'WEEK1_PROD': 5000, 'WEEK1_REJECT': 50, 'WEEK1_TIME': 8.5, 'WEEK2_PROD': 5200, 'WEEK2_REJECT': 45, 'WEEK2_TIME': 8.3, 'WEEK3_PROD': 5400, 'WEEK3_REJECT': 40, 'WEEK3_TIME': 8.1}, {'UNIT': 'B', 'WEEK1_PROD': 4800, 'WEEK1_REJECT': 60, 'WEEK1_TIME': 8.7, 'WEEK2_PROD': 5000, 'WEEK2_REJECT': 55, 'WEEK2_TIME': 8.6, 'WEEK3_PROD': 5100, 'WEEK3_REJECT': 50, 'WEEK3_TIME': 8.4}, {'UNIT': 'C', 'WEEK1_PROD': 4600, 'WEEK1_REJECT': 70, 'WEEK1_TIME': 8.9, 'WEEK2_PROD': 4800, 'WEEK2_REJECT': 65, 'WEEK2_TIME': 8.8, 'WEEK3_PROD': 4900, 'WEEK3_REJECT': 60, 'WEEK3_TIME': 8.5}]",
"summary": "Bi-weekly production metrics for each manufacturing unit:\n- Unit A:\n - Week 1: 5,000 units produced, 50 units rejected, 8.5 hours average production time\n - Week 2: 5,200 units produced, 45 units rejected, 8.3 hours average production time\n - Week 3: 5,400 units produced, 40 units rejected, 8.1 hours average production time\n- Unit B:\n - Week 1: 4,800 units produced, 60 units rejected, 8.7 hours average production time\n - Week 2: 5,000 units produced, 55 units rejected, 8.6 hours average production time\n - Week 3: 5,100 units produced, 50 units rejected, 8.4 hours average production time\n- Unit C:\n - Week 1: 4,600 units produced, 70 units rejected, 8.9 hours average production time\n - Week 2: 4,800 units produced, 65 units rejected, 8.8 hours average production time\n - Week 3: 4,900 units produced, 60 units rejected, 8.5 hours average production time"
},
{
# 572 tokens
"question": "What are the monthly financial performance metrics for each department, including total revenue, total expenses, and net profit?",
"raw_data": "[{'DEPARTMENT': 'Finance', 'JAN_REVENUE': 90000, 'JAN_EXPENSES': 50000, 'JAN_PROFIT': 40000, 'FEB_REVENUE': 95000, 'FEB_EXPENSES': 52000, 'FEB_PROFIT': 43000, 'MAR_REVENUE': 100000, 'MAR_EXPENSES': 54000, 'MAR_PROFIT': 46000}, {'DEPARTMENT': 'HR', 'JAN_REVENUE': 80000, 'JAN_EXPENSES': 48000, 'JAN_PROFIT': 32000, 'FEB_REVENUE': 85000, 'FEB_EXPENSES': 49000, 'FEB_PROFIT': 36000, 'MAR_REVENUE': 90000, 'MAR_EXPENSES': 50000, 'MAR_PROFIT': 40000}, {'DEPARTMENT': 'IT', 'JAN_REVENUE': 100000, 'JAN_EXPENSES': 60000, 'JAN_PROFIT': 40000, 'FEB_REVENUE': 105000, 'FEB_EXPENSES': 62000, 'FEB_PROFIT': 43000, 'MAR_REVENUE': 110000, 'MAR_EXPENSES': 64000, 'MAR_PROFIT': 46000}]",
"summary": "Monthly financial performance metrics for each department:\n- Finance:\n - January: 90,000 revenue, 50,000 expenses, 40,000 net profit\n - February: 95,000 revenue, 52,000 expenses, 43,000 net profit\n - March: 100,000 revenue, 54,000 expenses, 46,000 net profit\n- HR:\n - January: 80,000 revenue, 48,000 expenses, 32,000 net profit\n - February: 85,000 revenue, 49,000 expenses, 36,000 net profit\n - March: 90,000 revenue, 50,000 expenses, 40,000 net profit\n- IT:\n - January: 100,000 revenue, 60,000 expenses, 40,000 net profit\n - February: 105,000 revenue, 62,000 expenses, 43,000 net profit\n - March: 110,000 revenue, 64,000 expenses, 46,000 net profit"
},
{
# 582 tokens
"question": "What are the monthly revenue and expense breakdowns for each branch, including net profit and percentage growth from the previous month?",
"raw_data": "[{'BRANCH': 'New York', 'JAN_REVENUE': 150000, 'JAN_EXPENSES': 80000, 'JAN_PROFIT': 70000, 'FEB_REVENUE': 160000, 'FEB_EXPENSES': 85000, 'FEB_PROFIT': 75000, 'MAR_REVENUE': 170000, 'MAR_EXPENSES': 90000, 'MAR_PROFIT': 80000}, {'BRANCH': 'Los Angeles', 'JAN_REVENUE': 140000, 'JAN_EXPENSES': 75000, 'JAN_PROFIT': 65000, 'FEB_REVENUE': 150000, 'FEB_EXPENSES': 80000, 'FEB_PROFIT': 70000, 'MAR_REVENUE': 160000, 'MAR_EXPENSES': 85000, 'MAR_PROFIT': 75000}, {'BRANCH': 'Chicago', 'JAN_REVENUE': 130000, 'JAN_EXPENSES': 70000, 'JAN_PROFIT': 60000, 'FEB_REVENUE': 140000, 'FEB_EXPENSES': 75000, 'FEB_PROFIT': 65000, 'MAR_REVENUE': 150000, 'MAR_EXPENSES': 80000, 'MAR_PROFIT': 70000}]",
"summary": "Monthly revenue and expense breakdowns for each branch:\n- New York:\n - January: 150,000 revenue, 80,000 expenses, 70,000 net profit\n - February: 160,000 revenue, 85,000 expenses, 75,000 net profit\n - March: 170,000 revenue, 90,000 expenses, 80,000 net profit\n- Los Angeles:\n - January: 140,000 revenue, 75,000 expenses, 65,000 net profit\n - February: 150,000 revenue, 80,000 expenses, 70,000 net profit\n - March: 160,000 revenue, 85,000 expenses, 75,000 net profit\n- Chicago:\n - January: 130,000 revenue, 70,000 expenses, 60,000 net profit\n - February: 140,000 revenue, 75,000 expenses, 65,000 net profit\n - March: 150,000 revenue, 80,000 expenses, 70,000 net profit"
},
{
# 516 tokens
"question": "What are the monthly sales figures, returns, and customer satisfaction ratings for each product category?",
"raw_data": "[{'CATEGORY': 'Electronics', 'JAN_SALES': 50000, 'JAN_RETURNS': 200, 'JAN_SATISFACTION': 90, 'FEB_SALES': 52000, 'FEB_RETURNS': 220, 'FEB_SATISFACTION': 88, 'MAR_SALES': 54000, 'MAR_RETURNS': 240, 'MAR_SATISFACTION': 87}, {'CATEGORY': 'Clothing', 'JAN_SALES': 40000, 'JAN_RETURNS': 300, 'JAN_SATISFACTION': 85, 'FEB_SALES': 42000, 'FEB_RETURNS': 320, 'FEB_SATISFACTION': 84, 'MAR_SALES': 44000, 'MAR_RETURNS': 340, 'MAR_SATISFACTION': 83}, {'CATEGORY': 'Furniture', 'JAN_SALES': 30000, 'JAN_RETURNS': 100, 'JAN_SATISFACTION': 92, 'FEB_SALES': 32000, 'FEB_RETURNS': 120, 'FEB_SATISFACTION': 91, 'MAR_SALES': 34000, 'MAR_RETURNS': 140, 'MAR_SATISFACTION': 90}]",
"summary": "Monthly sales figures, returns, and customer satisfaction ratings for each product category:\n- Electronics:\n - January: 50,000 sales, 200 returns, 90% satisfaction\n - February: 52,000 sales, 220 returns, 88% satisfaction\n - March: 54,000 sales, 240 returns, 87% satisfaction\n- Clothing:\n - January: 40,000 sales, 300 returns, 85% satisfaction\n - February: 42,000 sales, 320 returns, 84% satisfaction\n - March: 44,000 sales, 340 returns, 83% satisfaction\n- Furniture:\n - January: 30,000 sales, 100 returns, 92% satisfaction\n - February: 32,000 sales, 120 returns, 91% satisfaction\n - March: 34,000 sales, 140 returns, 90% satisfaction"
}
]
return benchmark_data
def evaluate_text_to_chat(llm, benchmark_data, difficulty):
"""Evaluate the Text-to-Chat LLM using benchmark data."""
total_queries = len(benchmark_data)
rouge_l_scores = []
bert_scores = []
query_times = []
rouge = Rouge()
# Difficulty and Query Clarification
print(f"\n#################\nBenchmarking {total_queries} number of queries for {difficulty} difficulty\n#################")
for data in benchmark_data:
question = data['question']
raw_data = data['raw_data']
expected_summary = data['summary']
# Default variable reset
temp_rouge_l_f1 = 0
temp_bert_score = 0
start_time = time.time()
print(f"\n\nQuestion: {question}")
try:
# Generate textual summary using the QLLM
generated_summary = LLMConfiguration.generate_textual_insights(llm, question, raw_data)
generated_summary = str(generated_summary).strip()
# Time logging
end_time = time.time()
query_time = end_time - start_time
query_times.append(query_time)
# Evaluate using BERT
P, R, F1 = score([generated_summary], [expected_summary], lang="en", verbose=True)
temp_bert_score = np.mean(F1.cpu().numpy())
bert_scores.append(temp_bert_score)
# Evaluation using Rouge
temp_rouge_score = rouge.get_scores(generated_summary, expected_summary, avg=True)
temp_rouge_l_f1 = temp_rouge_score['rouge-l']['f']
rouge_l_scores.append(temp_rouge_l_f1)
print(f"Generated Summary: {generated_summary}")
print(f"Expected Summary: {expected_summary}")
print(f"BERT F1 Scoring: {temp_bert_score}")
print(f"Rouge F1 Scoring: {temp_rouge_l_f1}")
# print(f"Query Time: {query_time} seconds\n")
except Exception as e:
print(f"Error with summary generation: {e}")
# Store CHAT stat for test case to "chat_benchmark_stats"
DatabaseConfiguration.store_chat_stat(question, temp_bert_score, temp_rouge_l_f1, difficulty)
print("Added CHAT stat to chat_benchmark_stats")
# Calculate a summary of metrics for CHAT difficulty
average_rouge_score = np.mean(rouge_l_scores)
average_bert_score = np.mean(bert_scores)
average_latency = np.mean(query_times)
print(f"Average Rouge F1: {average_rouge_score:.2f}")
print(f"Average BERTScore F1: {average_bert_score:.2f}")
print(f"Average Query Time: {average_latency:.2f} seconds")
# Store CHAT summary to "chat_summary_stats"
DatabaseConfiguration.store_chat_summary(difficulty, average_bert_score, average_rouge_score, average_latency)
print("Added CHAT summary stat to chat_summary_stats")
def run_chat_benchmark():
# Deploy the Text-to-Chat LLM
chat_llm = LLMConfiguration.deploy_chat_llama()
# Load benchmark dataset
easy_benchmark_data = load_easy_benchmark()
medium_benchmark_data = load_medium_benchmark()
hard_benchmark_data = load_hard_benchmark()
# Evaluate the Text-to-Chat LLM
evaluate_text_to_chat(chat_llm, easy_benchmark_data, "easy")
evaluate_text_to_chat(chat_llm, medium_benchmark_data, "medium")
evaluate_text_to_chat(chat_llm, hard_benchmark_data ,"hard")