From 86627f9abb64a8759047c9f85ce63ad7f746bd06 Mon Sep 17 00:00:00 2001 From: Chris Alexiuk <161380339+chrisalexiuk-nvidia@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:09:03 -0400 Subject: [PATCH] Updating Hello World Example with README and try/except (#243) * Updating Hello World Example with README and try/except Signed-off-by: Chris Alexiuk * Updating Hello World Example with README and try/except Signed-off-by: Chris Alexiuk * Updating Hello World Example with README and try/except Signed-off-by: Chris Alexiuk --------- Signed-off-by: Chris Alexiuk --- .../synthetic-data-hello-world/README.md | 31 +++ ...ta Generation - Hello World Examples.ipynb | 225 +++++++++++------- 2 files changed, 171 insertions(+), 85 deletions(-) create mode 100644 tutorials/synthetic-data-hello-world/README.md diff --git a/tutorials/synthetic-data-hello-world/README.md b/tutorials/synthetic-data-hello-world/README.md new file mode 100644 index 00000000..75f3cff6 --- /dev/null +++ b/tutorials/synthetic-data-hello-world/README.md @@ -0,0 +1,31 @@ +# Synthetic Data Generation: Hello World Example + +The provided notebook will walk you through the currently available Synthetic Generation tools and pipelines available out-of-the-box through NeMo Curator! + +We'll walk through an example of each pipeline, as well as how you could make modifications to the provided pipelines. + +> NOTE: Currently, the `convert_response_to_yaml_list()` method is extremely strict - manual parsing of the intermediate results is recommended in all cases. In the notebook we have wrapped these in `try/except` blocks to ensure you can move through the notebook without being impeded by the error. + +### Covered Tools: + +Through the following tools, NeMo Curator offers the following tools, which are compatible with both OpenAI API compatible models hosted on `build.nvidia.com`, as well as any LLM NIM that is locally running. + +- NeMo Curator OpenAI Client (Sync and Async) +- Chat and Reward Model Usage + +### Covered Pipelines: + +Through the use of the `NemotronGenerator`, NeMo Curator offers the following pipelines: + +- Math Question Generation Pipeline +- Writing Task Generation Pipeline +- Open Question Generation Pipeline +- Closed Question Generation Pipeline +- Python Question Generation Pipeline +- Dialogue Generation Pipeline +- Two-Turn Prompt Generation Pipeline +- Entity Classification + - Classify Math Entity + - Classify Python Entity + +> NOTE: If you are using the `build.nvidia.com` endpoint for Nemotron-4 340B Instruct as your model for the above pipelines, during times of high load, it's possible that pipelines might time-out. In this case, we would recommend running the pipeline in a piecewise fashion and saving the intermediate outputs. diff --git a/tutorials/synthetic-data-hello-world/Synthetic Data Generation - Hello World Examples.ipynb b/tutorials/synthetic-data-hello-world/Synthetic Data Generation - Hello World Examples.ipynb index ae844e31..bbe0ed8c 100644 --- a/tutorials/synthetic-data-hello-world/Synthetic Data Generation - Hello World Examples.ipynb +++ b/tutorials/synthetic-data-hello-world/Synthetic Data Generation - Hello World Examples.ipynb @@ -34,13 +34,13 @@ "output_type": "stream", "text": [ "Cloning into 'NeMo-Curator'...\n", - "remote: Enumerating objects: 1797, done.\u001b[K\n", - "remote: Counting objects: 100% (1275/1275), done.\u001b[K\n", - "remote: Compressing objects: 100% (700/700), done.\u001b[K\n", - "remote: Total 1797 (delta 834), reused 822 (delta 567), pack-reused 522 (from 1)\u001b[K\n", - "Receiving objects: 100% (1797/1797), 2.17 MiB | 21.33 MiB/s, done.\n", - "Resolving deltas: 100% (1075/1075), done.\n", - "/home/chris/NeMo-Curator\n" + "remote: Enumerating objects: 2051, done.\u001b[K\n", + "remote: Counting objects: 100% (1512/1512), done.\u001b[K\n", + "remote: Compressing objects: 100% (837/837), done.\u001b[K\n", + "remote: Total 2051 (delta 983), reused 1002 (delta 666), pack-reused 539 (from 1)\u001b[K\n", + "Receiving objects: 100% (2051/2051), 2.28 MiB | 15.29 MiB/s, done.\n", + "Resolving deltas: 100% (1236/1236), done.\n", + "/home/chris/Code/NVIDIA/NeMo-Curator/tutorials/synthetic-data-hello-world/NeMo-Curator\n" ] } ], @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -67,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -111,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -132,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -163,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -218,7 +218,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -234,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -259,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -306,7 +306,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -361,7 +361,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -394,7 +394,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -418,7 +418,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -467,13 +467,25 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ - "topic_list = generator.convert_response_to_yaml_list(\n", - " responses[0], model=model, model_kwargs=model_kwargs\n", - ")" + "from nemo_curator.synthetic.error import YamlConversionError\n", + "\n", + "while True:\n", + " try:\n", + " topic_list = generator.convert_response_to_yaml_list(\n", + " responses[0], model=model, model_kwargs=model_kwargs\n", + " )\n", + " break\n", + " except YamlConversionError as e:\n", + " print(f\"Hit: {e}, Retrying...\")\n", + " responses = generator.generate_macro_topics(\n", + " n_macro_topics=n_macro_topics, \n", + " model=model, \n", + " model_kwargs=model_kwargs\n", + " )" ] }, { @@ -485,7 +497,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -519,7 +531,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -541,7 +553,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -573,18 +585,26 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ - "subtopic_list = generator.convert_response_to_yaml_list(\n", - " subtopic_responses[0], model=model, model_kwargs=model_kwargs\n", - ")" + "while True:\n", + " try:\n", + " subtopic_list = generator.convert_response_to_yaml_list(\n", + " subtopic_responses[0], model=model, model_kwargs=model_kwargs\n", + " )\n", + " break\n", + " except YamlConversionError as e:\n", + " print(f\"Hit: {e}, Retrying...\")\n", + " subtopic_responses = generator.generate_subtopics(\n", + " macro_topic=topic_list[0], n_subtopics=n_subtopics, model=model\n", + " )" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -597,7 +617,7 @@ " 'Climate Change Mitigation and Adaptation Strategies']" ] }, - "execution_count": 21, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -626,7 +646,7 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -646,33 +666,59 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 39, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hit: Conversion introduced hallucinations. Original response:\n", + "1. If the current rate of carbon dioxide emissions is 50 billion tons per year and the concentration of CO2 in the atmosphere is currently 400 parts per million (ppm), assuming no removal or absorption, how many years will it take for the CO2 concentration to reach 500 ppm?\n", + "2. The Earth absorbs 24% of the solar energy it receives, while the rest is reflected back into space. If greenhouse gases cause the Earth to retain an additional 0.3% of the solar energy, what is the total percentage of solar energy that the Earth now retains?\n", + "3. If a factory releases 10,000 tons of CO2 per year and can be converted to use renewable energy, which would reduce its emissions to zero, how much will the global CO2 concentration decrease if the factory's emissions are completely eliminated after 10 years?\n", + "4. The greenhouse effect is responsible for trapping 0.03% of the total solar energy that reaches the Earth's surface. If the concentration of greenhouse gases in the atmosphere increases by 50%, how much more solar energy will be trapped, assuming a linear relationship?\n", + "5. Assume that the current global temperature increase due to greenhouse gas emissions is 0.01°C per year. If the total greenhouse gas emissions were to be reduced by 25% in the next 10 years, by how much would the temperature increase be reduced over the following 10 years?\n", + "6. Given that the average American produces 16.5 metric tons of CO2 per year, what percentage reduction in CO2 emissions would be needed to achieve the goal of keeping the global temperature increase below 1.5°C above pre-industrial levels, assuming all other factors remain constant?\n", + "7. If the global methane concentration is currently 1.8 parts per billion (ppb) and increases by 0.02 ppb per year due to human activities, how many years will it take for the methane concentration to reach 2.0 ppb, assuming no removal or absorption?\n", + "8. Assume that the current rate of deforestation releases 2.4 billion tons of CO2 per year. If all deforestation were stopped immediately, how much would the global CO2 concentration decrease after 50 years, assuming no other changes in emissions?\n", + "9. If a new technology can capture and store 90% of the CO2 emissions from a power plant, and the power plant emits 10,000 tons of CO2 per year, how much CO2 would be released into the atmosphere each year with the new technology?\n", + "10. Given that the global average temperature has increased by approximately 1°C since the pre-industrial era, and that this temperature increase is due to a 40% increase in the concentration of greenhouse gases, estimate the global average temperature increase if the concentration of greenhouse gases were to double.\n", + "Converted response:\n", + "['50 billion tons per year', '24.3%', '10,000 tons per year', '0.015%', '0.005°C per year', '66.25%', '55.56 years', '1.2 billion tons of CO2', '1,000 tons of CO2 per year', '2°C']\n", + "Hallucination:\n", + "24.3%, Retrying with fewer examples...\n" + ] + }, { "data": { "text/plain": [ - "['If the current rate of carbon dioxide emissions is 50 billion tons per year and the concentration of CO2 in the atmosphere is currently 400 parts per million (ppm), assuming no removal or absorption, how many years will it take for the CO2 concentration to reach 500 ppm?',\n", - " 'The Earth absorbs 24% of the solar energy it receives, while the rest is reflected back into space. If greenhouse gases cause the Earth to retain an additional 0.3% of the solar energy, what is the total percentage of solar energy that the Earth now retains?',\n", - " \"If a factory releases 10,000 tons of CO2 per year and can be converted to use renewable energy, which would reduce its emissions to zero, how much will the global CO2 concentration decrease if the factory's emissions are completely eliminated after 10 years?\",\n", - " \"The greenhouse effect is responsible for trapping 0.03% of the total solar energy that reaches the Earth's surface. If the concentration of greenhouse gases in the atmosphere increases by 50%, how much more solar energy will be trapped, assuming a linear relationship?\",\n", - " 'Assume that the current global temperature increase due to greenhouse gas emissions is 0.01°C per year. If the total greenhouse gas emissions were to be reduced by 25% in the next 10 years, by how much would the temperature increase be reduced over the following 10 years?',\n", - " 'Given that the average American produces 16.5 metric tons of CO2 per year, what percentage reduction in CO2 emissions would be needed to achieve the goal of keeping the global temperature increase below 1.5°C above pre-industrial levels, assuming all other factors remain constant?',\n", - " 'If the global methane concentration is currently 1.8 parts per billion (ppb) and increases by 0.02 ppb per year due to human activities, how many years will it take for the methane concentration to reach 2.0 ppb, assuming no removal or absorption?',\n", - " 'Assume that the current rate of deforestation releases 2.4 billion tons of CO2 per year. If all deforestation were stopped immediately, how much would the global CO2 concentration decrease after 50 years, assuming no other changes in emissions?',\n", - " 'If a new technology can capture and store 90% of the CO2 emissions from a power plant, and the power plant emits 10,000 tons of CO2 per year, how much CO2 would be prevented from entering the atmosphere each year?',\n", - " 'Assume that the global temperature increase due to greenhouse gas emissions is directly proportional to the logarithm of the CO2 concentration. If the current CO2 concentration is 400 ppm and the target concentration to limit warming to 1.5°C is 350 ppm, by how much would the global temperature increase be reduced if the CO2 concentration were immediately reduced to 350 ppm?']" + "['Carbon Footprint Calculation',\n", + " 'Greenhouse Gas Concentration Trends',\n", + " 'Global Temperature Change Estimation',\n", + " 'Absorption of Solar Radiation',\n", + " \"Climate Modeling a City's Temperature Increase\"]" ] }, - "execution_count": 121, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "question_list = generator.convert_response_to_yaml_list(\n", - " question_responses[0], model=model, model_kwargs=model_kwargs\n", - ")\n", + "while True:\n", + " try:\n", + " question_list = generator.convert_response_to_yaml_list(\n", + " question_responses[0], model=model, model_kwargs=model_kwargs\n", + " )\n", + " break\n", + " except YamlConversionError as e:\n", + " print(f\"Hit: {e}, Retrying with fewer examples...\")\n", + " question_responses = generator.generate_math_problem(\n", + " topic=subtopic_list[0],\n", + " n_openlines=5,\n", + " model=model\n", + " )\n", "question_list" ] }, @@ -708,7 +754,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -731,16 +777,16 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "\"1. A therapist has 15 clients that she sees for individual therapy each week. She wants to ensure that she has a diverse set of self-care activities planned for each day of the week to maintain her own mental health and well-being. How many different self-care activities should she aim to have in her weekly routine, if she doesn't want to repeat any activity on the same day?\\n2. A mental health support group wants to create a buddy system for its members to promote accountability and connection. They want each member to have at least one buddy but no more than three. If there are 20 members in the group, how many unique buddy pairings can be made?\\n3. A person is keeping track of their mood using a 10-point scale, where 1 represents the lowest mood and 10 represents the highest. They want to set a goal of having an average mood score of at least 7 over the course of a month. If they record their mood score every day for 30 days, what is the minimum total mood score they need to achieve in order to meet their goal?\\n4. A high school offers various wellness clubs and activities for students to promote mental health and well-being. Each club meets twice a week, and there are 10 unique clubs available. If a student wants to participate in at least one club per day and attend no more than 4 club meetings per day, what is the maximum number of clubs they can join?\\n5. A mental health counselor is creating a schedule to balance their client sessions, administrative tasks, and self-care. They want to spend at least 60% of their working hours on client sessions, no more than 20% on administrative tasks, and at least 20% on self-care. If they have 40 working hours available per week, how many hours should they dedicate to each category?\\n6. A wellness center offers group therapy sessions with a maximum of 8 participants per group. They want to ensure that each participant has at least two opportunities to attend a group session each week. If they have enough therapists to run 10 group sessions per week, what is the minimum number of unique participants they need to accommodate so that everyone can attend at least two sessions?\\n7. A person wants to create a daily self-care routine that includes meditation, exercise, reading, and journaling. They have 60 minutes available for their routine and want to spend an equal amount of time on each activity. If they want to ensure they have at least a 5-minute break between each activity, how many minutes should they dedicate to each activity?\\n8. A mental health organization wants to create a support network for its employees. They want to ensure that each employee is connected to at least two other employees within the organization. If there are 50 employees in total, how many unique connections can be made while ensuring each employee has at least two?\\n9. A school wants to provide mental health education to its students. They plan to have guest speakers present for 45-minute sessions, with each speaker addressing one specific mental health topic. If they want to cover 10 different mental health topics and ensure that each student attends at least two sessions, what is the minimum number of guest speakers they need if there are 200 students in the school?\\n10. A counselor wants to track their client's progress on their mental health well-being. They use a scale of 1-5, where 1 represents poor well-being and 5 represents excellent well-being. They have 10 clients and want to see an overall improvement of at least 10% in their clients' well-being scores after 6 months. What is the minimum total increase in well-being scores they should aim for across all their clients?\"" + "\"1. If a forest covering 10,000 square kilometers is cut down, approximately how many trees are lost? (Assuming an average of 500 trees per hectare and 1 hectare = 0.01 square kilometers)\\n2. If deforestation continues at the current rate, how many years will it take for the world's rainforests to disappear completely? (Assuming the current rate is 150,000 square kilometers per year and the total area of rainforests is 11,500,000 square kilometers)\\n3. If the average temperature increases by 0.2°C for every 1% decrease in forest cover, what will be the increase in temperature if 2% of the forest cover is lost?\\n4. If a country has a carbon footprint of 500 million tons per year and decides to reduce it by planting trees that absorb 10,000 tons of carbon dioxide per square kilometer per year, how many square kilometers of forest would need to be planted to offset the entire carbon footprint?\\n5. If a forest provides habitat for 400 species of birds and 30% of those species are threatened by deforestation, how many bird species are at risk?\\n6. If a logging company harvests trees from a 500-hectare forest every 20 years, what is the annual deforestation rate?\\n7. If 100,000 tons of carbon dioxide are released into the atmosphere each day due to deforestation, how many tons of carbon dioxide are released in a year (assuming 365 days in a year)?\\n8. If 20% of the Amazon rainforest has been destroyed and the Amazon holds 400 billion tons of carbon, how much carbon has been released into the atmosphere due to deforestation?\\n9. If a forest serves as a watershed for a city of 1 million people, what is the impact on the city's water supply if 50% of the forest is lost?\\n10. If planting trees can increase biodiversity, and a tree species has 500 seeds per kilogram and each seed grows into a new tree, how many new trees can be created from 100 kilograms of seeds?\"" ] }, - "execution_count": 76, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -758,25 +804,25 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[\"A therapist has 15 clients that she sees for individual therapy each week. She wants to ensure that she has a diverse set of self-care activities planned for each day of the week to maintain her own mental health and well-being. How many different self-care activities should she aim to have in her weekly routine, if she doesn't want to repeat any activity on the same day?\",\n", - " 'A mental health support group wants to create a buddy system for its members to promote accountability and connection. They want each member to have at least one buddy but no more than three. If there are 20 members in the group, how many unique buddy pairings can be made?',\n", - " 'A person is keeping track of their mood using a 10-point scale, where 1 represents the lowest mood and 10 represents the highest. They want to set a goal of having an average mood score of at least 7 over the course of a month. If they record their mood score every day for 30 days, what is the minimum total mood score they need to achieve in order to meet their goal?',\n", - " 'A high school offers various wellness clubs and activities for students to promote mental health and well-being. Each club meets twice a week, and there are 10 unique clubs available. If a student wants to participate in at least one club per day and attend no more than 4 club meetings per day, what is the maximum number of clubs they can join?',\n", - " 'A mental health counselor is creating a schedule to balance their client sessions, administrative tasks, and self-care. They want to spend at least 60% of their working hours on client sessions, no more than 20% on administrative tasks, and at least 20% on self-care. If they have 40 working hours available per week, how many hours should they dedicate to each category?',\n", - " 'A wellness center offers group therapy sessions with a maximum of 8 participants per group. They want to ensure that each participant has at least two opportunities to attend a group session each week. If they have enough therapists to run 10 group sessions per week, what is the minimum number of unique participants they need to accommodate so that everyone can attend at least two sessions?',\n", - " 'A person wants to create a daily self-care routine that includes meditation, exercise, reading, and journaling. They have 60 minutes available for their routine and want to spend an equal amount of time on each activity. If they want to ensure they have at least a 5-minute break between each activity, how many minutes should they dedicate to each activity?',\n", - " 'A mental health organization wants to create a support network for its employees. They want to ensure that each employee is connected to at least two other employees within the organization. If there are 50 employees in total, how many unique connections can be made while ensuring each employee has at least two?',\n", - " 'A school wants to provide mental health education to its students. They plan to have guest speakers present for 45-minute sessions, with each speaker addressing one specific mental health topic. If they want to cover 10 different mental health topics and ensure that each student attends at least two sessions, what is the minimum number of guest speakers they need if there are 200 students in the school?',\n", - " \"A counselor wants to track their client's progress on their mental health well-being. They use a scale of 1-5, where 1 represents poor well-being and 5 represents excellent well-being. They have 10 clients and want to see an overall improvement of at least 10% in their clients' well-being scores after 6 months. What is the minimum total increase in well-being scores they should aim for across all their clients?\"]" + "['If a forest covering 10,000 square kilometers is cut down, approximately how many trees are lost? (Assuming an average of 500 trees per hectare and 1 hectare = 0.01 square kilometers)',\n", + " \"If deforestation continues at the current rate, how many years will it take for the world's rainforests to disappear completely? (Assuming the current rate is 150,000 square kilometers per year and the total area of rainforests is 11,500,000 square kilometers)\",\n", + " 'If the average temperature increases by 0.2°C for every 1% decrease in forest cover, what will be the increase in temperature if 2% of the forest cover is lost?',\n", + " 'If a country has a carbon footprint of 500 million tons per year and decides to reduce it by planting trees that absorb 10,000 tons of carbon dioxide per square kilometer per year, how many square kilometers of forest would need to be planted to offset the entire carbon footprint?',\n", + " 'If a forest provides habitat for 400 species of birds and 30% of those species are threatened by deforestation, how many bird species are at risk?',\n", + " 'If a logging company harvests trees from a 500-hectare forest every 20 years, what is the annual deforestation rate?',\n", + " 'If 100,000 tons of carbon dioxide are released into the atmosphere each day due to deforestation, how many tons of carbon dioxide are released in a year (assuming 365 days in a year)?',\n", + " 'If 20% of the Amazon rainforest has been destroyed and the Amazon holds 400 billion tons of carbon, how much carbon has been released into the atmosphere due to deforestation?',\n", + " \"If a forest serves as a watershed for a city of 1 million people, what is the impact on the city's water supply if 50% of the forest is lost?\",\n", + " 'If planting trees can increase biodiversity, and a tree species has 500 seeds per kilogram and each seed grows into a new tree, how many new trees can be created from 100 kilograms of seeds?']" ] }, - "execution_count": 122, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -803,7 +849,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -819,25 +865,25 @@ }, { "cell_type": "code", - "execution_count": 131, + "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['Develop a mathematical model to quantify the relationship between deforestation, carbon sequestration, and the global carbon budget, taking into account the impacts on biodiversity and climate change.',\n", - " 'Analyze the impact of various deforestation scenarios on species diversity and extinction rates using advanced mathematical techniques such as population dynamics models and biodiversity indices.',\n", - " 'Create a complex mathematical function to estimate the change in surface temperature and precipitation patterns as a result of deforestation-induced climate change.',\n", - " 'Utilize statistical methods to assess the correlation between deforestation rates and changes in local and regional climate patterns, controlling for other factors such as land use and anthropogenic emissions.',\n", - " 'Develop a mathematical framework to evaluate the optimal balance between deforestation for land use and the preservation of biodiversity and climate stability.',\n", - " 'Use mathematical modeling to predict the long-term impacts of deforestation on the global carbon cycle and the feedback loops between carbon sinks and sources.',\n", - " 'Utilize probability theory and stochastic processes to model the variability and uncertainty in the relationship between deforestation, biodiversity, and climate change.',\n", - " 'Develop a mathematical method for integrating the impacts of deforestation on biodiversity and climate change into economic cost-benefit analyses and decision-making frameworks.',\n", - " 'Use optimization algorithms to identify the most effective strategies for reducing deforestation and mitigating its impacts on biodiversity and climate change.',\n", - " 'Use differential equations to model the complex interactions between deforestation, biodiversity loss, and climate change, and develop strategies for managing these systems in a sustainable manner.']" + "['Develop a mathematical model to quantify the relationship between deforestation, carbon sequestration, and the global carbon budget, taking into account the impacts on biodiversity and climate change',\n", + " 'Analyze the impact of various deforestation scenarios on species diversity and extinction rates using advanced mathematical techniques such as population dynamics models and biodiversity indices',\n", + " 'Create a complex mathematical function to estimate the change in surface temperature and precipitation patterns as a result of deforestation-induced climate change',\n", + " 'Utilize statistical methods to assess the correlation between deforestation rates and changes in local and regional climate patterns, controlling for other factors such as land use and anthropogenic emissions',\n", + " 'Develop a mathematical framework to evaluate the optimal balance between deforestation for land use and the preservation of biodiversity and climate stability',\n", + " 'Use mathematical modeling to predict the long-term impacts of deforestation on the global carbon cycle and the feedback loops between carbon sinks and sources',\n", + " 'Utilize probability theory and stochastic processes to model the variability and uncertainty in the relationship between deforestation, biodiversity, and climate change',\n", + " 'Develop a mathematical method for integrating the impacts of deforestation on biodiversity and climate change into economic cost-benefit analyses and decision-making frameworks',\n", + " 'Use optimization algorithms to identify the most effective strategies for reducing deforestation and mitigating its impacts on biodiversity and climate change',\n", + " 'Use differential equations to model the complex interactions between deforestation, biodiversity loss, and climate change, and develop strategies for managing these systems in a sustainable manner']" ] }, - "execution_count": 131, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -850,11 +896,20 @@ " model=model,\n", " prompt_template=DIFFICULT_MATH_PROMPT\n", ")\n", - "\n", - "# convert the response to a list\n", - "difficult_question_list = generator.convert_response_to_yaml_list(\n", - " difficult_question_responses[0], model=model, model_kwargs=model_kwargs\n", - ")\n", + "while True:\n", + " try:\n", + " difficult_question_list = generator.convert_response_to_yaml_list(\n", + " difficult_question_responses[0], model=model, model_kwargs=model_kwargs\n", + " )\n", + " break\n", + " except YamlConversionError as e:\n", + " print(f\"Hit: {e}, Retrying with fewer examples...\")\n", + " difficult_question_responses = generator.generate_math_problem(\n", + " topic=subtopic_list[1],\n", + " n_openlines=5,\n", + " model=model,\n", + " prompt_template=DIFFICULT_MATH_PROMPT\n", + " )\n", "difficult_question_list" ] }, @@ -871,7 +926,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 46, "metadata": {}, "outputs": [], "source": [ @@ -895,7 +950,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [