diff --git a/lessons/Data-Science/CODAP/codap-exploring/langs/en-us/index.adoc b/lessons/Data-Science/CODAP/codap-exploring/langs/en-us/index.adoc index bf899b114fb..9b4682c2936 100644 --- a/lessons/Data-Science/CODAP/codap-exploring/langs/en-us/index.adoc +++ b/lessons/Data-Science/CODAP/codap-exploring/langs/en-us/index.adoc @@ -22,8 +22,8 @@ | Student-facing Lesson Goals | -* Let's familiarize ourselves with CODAP. -* Let's learn how to tell whether a value is a Number or a String. +- Let's familiarize ourselves with CODAP. +- Let's learn how to tell whether a value is a Number or a String. | Materials |[.materials-links] @@ -32,7 +32,7 @@ | Key Points For The Facilitator | -* Error messages are the computer trying to give us a clue that something is wrong. Model reacting to error messages with interest to demonstrate to students that the messages are a helpful tool. +- Error messages are the computer trying to give us a clue that something is wrong. Model reacting to error messages with interest to demonstrate to students that the messages are a helpful tool. |=== @@ -77,7 +77,7 @@ Debrief with students. Discuss any difficulties that students encountered, for i === Launch -The folks at the shelter need help completing a variety of tasks using CODAP! To do this, they need to learn how CODAP handles the kinds of data in the animals data set (numbers, words, and more...). +The folks at the shelter need help completing a variety of tasks using CODAP! To do this, they need to learn how CODAP handles the kinds of data in the animals dataset (numbers, words, and more...). === Investigate diff --git a/lessons/Data-Science/bar-and-pie-charts/langs/en-us/index.adoc b/lessons/Data-Science/bar-and-pie-charts/langs/en-us/index.adoc index 9ea054d4194..d7213b780d3 100644 --- a/lessons/Data-Science/bar-and-pie-charts/langs/en-us/index.adoc +++ b/lessons/Data-Science/bar-and-pie-charts/langs/en-us/index.adoc @@ -38,7 +38,7 @@ |=== == Grouping by Value -@objective{categorical-visualizations-pyret} +@objective{categorical-visualizations-proglang} @objective{interpret-bar} @objective{interpret-pie} @objective{pros-cons-bar-pie} @@ -169,7 +169,7 @@ Open the @opt-starter-file{hair} to try them out! == Groups and Subgroups -@objective{compound-categorical-visualizations-pyret} +@objective{compound-categorical-visualizations-proglang} @objective{interpret-multi-bar} @objective{interpret-stacked-bar} === Overview diff --git a/lessons/Data-Science/box-plots/langs/en-us/images/boxplot1-launch.png b/lessons/Data-Science/box-plots/langs/en-us/images/boxplot1-launch.png new file mode 100644 index 00000000000..1feca0f4323 Binary files /dev/null and b/lessons/Data-Science/box-plots/langs/en-us/images/boxplot1-launch.png differ diff --git a/lessons/Data-Science/box-plots/langs/en-us/images/boxplot2-launch.png b/lessons/Data-Science/box-plots/langs/en-us/images/boxplot2-launch.png new file mode 100644 index 00000000000..02eabd94637 Binary files /dev/null and b/lessons/Data-Science/box-plots/langs/en-us/images/boxplot2-launch.png differ diff --git a/lessons/Data-Science/box-plots/langs/en-us/images/histogram-launch.png b/lessons/Data-Science/box-plots/langs/en-us/images/histogram-launch.png new file mode 100644 index 00000000000..5ec91138d14 Binary files /dev/null and b/lessons/Data-Science/box-plots/langs/en-us/images/histogram-launch.png differ diff --git a/lessons/Data-Science/box-plots/langs/en-us/images/launch-boxplot-a.png b/lessons/Data-Science/box-plots/langs/en-us/images/launch-boxplot-a.png new file mode 100644 index 00000000000..0ce687a0dd9 Binary files /dev/null and b/lessons/Data-Science/box-plots/langs/en-us/images/launch-boxplot-a.png differ diff --git a/lessons/Data-Science/box-plots/langs/en-us/images/launch-boxplot-b.png b/lessons/Data-Science/box-plots/langs/en-us/images/launch-boxplot-b.png new file mode 100644 index 00000000000..28125b2c43d Binary files /dev/null and b/lessons/Data-Science/box-plots/langs/en-us/images/launch-boxplot-b.png differ diff --git a/lessons/Data-Science/box-plots/langs/en-us/images/launch-histo-c.png b/lessons/Data-Science/box-plots/langs/en-us/images/launch-histo-c.png new file mode 100644 index 00000000000..b4acc923a06 Binary files /dev/null and b/lessons/Data-Science/box-plots/langs/en-us/images/launch-histo-c.png differ diff --git a/lessons/Data-Science/box-plots/langs/en-us/images/launch-histo-d.png b/lessons/Data-Science/box-plots/langs/en-us/images/launch-histo-d.png new file mode 100644 index 00000000000..a248223e2b0 Binary files /dev/null and b/lessons/Data-Science/box-plots/langs/en-us/images/launch-histo-d.png differ diff --git a/lessons/Data-Science/box-plots/langs/en-us/images/lesson-images.json b/lessons/Data-Science/box-plots/langs/en-us/images/lesson-images.json index 9f6149f23a8..140a8d0a05d 100644 --- a/lessons/Data-Science/box-plots/langs/en-us/images/lesson-images.json +++ b/lessons/Data-Science/box-plots/langs/en-us/images/lesson-images.json @@ -204,5 +204,40 @@ "description": "a box plot of the Smith family data clustered tightly at the right end of the number line", "source" : "Created by the Bootstrap Team in Pyret based on contrived data", "license" : "Creative Commons 4.0 - NC - SA" + }, + "launch-boxplot-a.png": { + "description": "a box plot that is skewed to the right; the center box has a larger area on the right", + "source" : "Created by the Bootstrap Team in Pyret based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "launch-boxplot-b.png": { + "description": "a box plot that is skewed to the left; the center box has a larger area on the left", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "launch-histo-c.png": { + "description": "a histogram that is symmetric with a peak in the middle", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "launch-histo-d.png": { + "description": "A symmetric histogram with two peaks", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "boxplot1-launch.png": { + "description": "a box plot that is skewed to the right; the center box has a larger area on the right", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "boxplot2-launch.png": { + "description": "a box plot that is skewed to the left; the center box has a larger area on the left", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histogram-launch.png": { + "description": "A histogram that is skewed to the right", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" } } diff --git a/lessons/Data-Science/box-plots/langs/en-us/index.adoc b/lessons/Data-Science/box-plots/langs/en-us/index.adoc index c7a8b8db3ea..ead72fe8129 100644 --- a/lessons/Data-Science/box-plots/langs/en-us/index.adoc +++ b/lessons/Data-Science/box-plots/langs/en-us/index.adoc @@ -418,7 +418,7 @@ ifnotslide{@centered-image{images/codap-box-plot.png, 300}} @A{The third quartile is the value for which 75% of the animals weighed that amount or less. Another way of saying that would be that it is the value for which 25% of the animals weigh that amount or more.} @Q{Why do you think this visualization is sometimes called a "box and whisker plot"?} @A{The distance between Min/Q1 and Q3/Max is drawn like whiskers!} -@Q{Could we make a box plot for every column in the data set?} +@Q{Could we make a box plot for every column in the dataset?} @A{No. We can only make box plots for @vocab{quantitative} columns.} } @@ -499,7 +499,7 @@ It is extremely common for students to forget that the quartiles divide the data Want to check student mastery of the content you've just taught? Administer @assessment{box-plots-check3-desmos} to get a snapshot of your students' current level of mastery. Make sure you have created a link or code for your class to the assessment. -If you'd prefer to wait until your students have completed the __entire__ lesson to check mastery, we also offer a cumulative assessment at the end of @link{https://www.bootstrapworld.org/materials/latest/en-us/lessons/box-plots/index.shtml?pathway=data-science#_interpreting_box_plots_pyret, "Connecting Box Plots and Histograms"}, below. +If you'd prefer to wait until your students have completed the _entire_ lesson to check mastery, we also offer a cumulative assessment at the end of @link{https://www.bootstrapworld.org/materials/latest/en-us/lessons/box-plots/index.shtml?pathway=data-science#_interpreting_box_plots_pyret, "Connecting Box Plots and Histograms"}, below. } @@ -514,17 +514,24 @@ Box plots and histograms give us two different views of the shape of quantitativ @slidebreak -[cols="^.^1a,^.^1a", frame="none", grid="none"] + +@QandA{ +@Q{One of these box plots represents the same data as the histogram. Which one is it? How do you know?} +@A{The first box plot represents the same data as the histogram.} +@A{Sample response: The right whisker of the first box plot is longer, suggesting that there are outliers on the right causing the data to skew right.} + +@center{@image{images/histogram-launch.png,175}} + +[cols="^.^1a,^.^1a"] |=== -| @image{images/box-plot-pounds-cropped.png} -| @image{images/histogram-pounds.png} +| @image{images/boxplot2-launch.png, 125} +| @image{images/boxplot1-launch.png, 125} |=== -@lesson-instruction{ -- Take a minute to look at these two visualizations we've made for the "pounds" column and try to connect them in your mind. -- How would you explain the differences in how the data gets displayed? } +@teacher{Students may notice that the left part of the box is larger on the box plot B. Although that observation is correct, the box's larger area does *not* indicate that *more* data points are clustered there. Remind students that an equal number of data points are clustered in each quadrant of the box plot. You can emphasize this core idea as you discuss the table comparing box plots and histograms@ifnotslide{,below.}@ifslide{ on the next slide.}} + @slidebreak [cols="1,^1,^3, ^3", options="header"] @@ -549,6 +556,44 @@ shorter bars to one side |=== +@slidebreak + +@QandA{ +@Q{Match the box plots below with the corresponding histograms. How did you make your matches?} +@A{Box plot A represents the same data as histogram C. Box plot B represents the same data as histogram D. Students may suspect that the short whiskers of histogram B match with the short bars on the edges of histogram C. Because box plots represent density and not frequency, this reasoning is incorrect. } + +++++ + +++++ + +[.lettering, cols="^.^1a,^.^1a", header="none", stripes="none"] +|=== +| A + +@image{images/launch-boxplot-a.png, 125} +| B + +@image{images/launch-boxplot-b.png, 125} +| C + +@image{images/launch-histo-c.png, 125} +| D + +@image{images/launch-histo-d.png, 125} + +|=== + + +} + +@teacher{The matching activities in this launch section were drawn from research conducted by @citation{lem-et-al-2011, "Lev et al (2011)"}.} + + + @strategy{Kinesthetic Activity}{ Divide the class into groups, and give each group a ruler and a ball of play-dough. Have them draw a number line from 0-6 with the ruler, marking off the points at 0, 3, 4, 4.5 and 6 inches. Have the groups roll the dough into a thick cylinder, divide that cylinder in half, and then split each half to form four _equally-sized cylinders_. The play-dough represents a @vocab{sample}, with values divided into four quarters. diff --git a/lessons/Data-Science/contracts-visualizations/langs/en-us/index.adoc b/lessons/Data-Science/contracts-visualizations/langs/en-us/index.adoc index e14b6e13bc4..8dab2762e07 100644 --- a/lessons/Data-Science/contracts-visualizations/langs/en-us/index.adoc +++ b/lessons/Data-Science/contracts-visualizations/langs/en-us/index.adoc @@ -102,7 +102,7 @@ Students will be introduced to functions for making one-variable visualizations The goal here is for students to become familiar with using @dist-link{Contracts.shtml, Contracts} to write expressions that will produce visualizations. But knowing how to __make__ a histogram doesn't mean a student really __understands__ histograms, and that's OK! -@teacher{Once students know how to use Contracts to write expressions to make these visualizations, we have dedicated, in-depth lessons focused on understanding @lesson-link{bar-and-pie-charts}, @lesson-link{histograms}, @lesson-link{visualizing-the-shape-of-data}, @lesson-link{box-plots}, @lesson-link{scatter-plots}, @lesson-link{linear-regression}, @lesson-link{advanced-visualizations}, etc.} +@teacher{Once students know how to use Contracts to write expressions to make these visualizations, we have dedicated, in-depth lessons focused on understanding @lesson-link{bar-and-pie-charts}, @lesson-link{histograms-visualize}, @lesson-link{histograms-interpret}, @lesson-link{box-plots}, @lesson-link{scatter-plots}, @lesson-link{linear-regression}, @lesson-link{advanced-visualizations}, etc.} === Launch diff --git a/lessons/Data-Science/correlations/langs/en-us/index.adoc b/lessons/Data-Science/correlations/langs/en-us/index.adoc index b8ff1c0b6f9..de30f40c4d5 100644 --- a/lessons/Data-Science/correlations/langs/en-us/index.adoc +++ b/lessons/Data-Science/correlations/langs/en-us/index.adoc @@ -175,7 +175,7 @@ Complete @printable-exercise{identifying-form.adoc} and focus _just on the secon === Synthesize - It only makes sense to look for direction in linear relationships! -- Which data sets appear to have a positive correlation between the variables? +- Which datasets appear to have a positive correlation between the variables? == Correlations have _Strength_ @@ -362,7 +362,7 @@ Students often giggle at some of the Spurious Correlations examples, but fail to == Exploration Project (Correlations) === Overview -Students apply what they have learned about correlations to their chosen dataset. They will add two or more items to their @starter-file{exploration-project}: (1) a correlation they think they see in the data set, and (2) the form, direction and strength of that correlation. +Students apply what they have learned about correlations to their chosen dataset. They will add two or more items to their @starter-file{exploration-project}: (1) a correlation they think they see in the dataset, and (2) the form, direction and strength of that correlation. @teacher{Visit @lesson-link{project-data-exploration} to learn more about the sequence and scope. Teachers with time and interest can build on the exploration by inviting students to take a deep dive into the questions they develop with our @lesson-link{project-research-paper}. } diff --git a/lessons/Data-Science/data-cycle/langs/en-us/index.adoc b/lessons/Data-Science/data-cycle/langs/en-us/index.adoc index 2b630971a66..3024103a52f 100644 --- a/lessons/Data-Science/data-cycle/langs/en-us/index.adoc +++ b/lessons/Data-Science/data-cycle/langs/en-us/index.adoc @@ -230,7 +230,7 @@ Most questions can be broken down into one of four categories: @Q{What kind of question is "Are more animals fixed or unfixed?"? How do you know?} @A{It's an _arithmetic question_ because answering it requires comparing two simple calculations.} @Q{What kind of question is "Are snails or tarantulas taller?"? How do you know?} -@A{It's a _question we can't answer_ because there isn't any information in this data set about the heights of the animals.} +@A{It's a _question we can't answer_ because there isn't any information in this dataset about the heights of the animals.} @Q{What kind of question is "How old is Toggle?" How do you know?} @A{It's a _lookup question_ because it can be answered by just looking at the table.} @Q{What kind of question is "Are older animals adopted more quickly than younger animals?" How do you know?} @@ -368,7 +368,7 @@ Throughout the remainder of Bootstrap:Data Science we will be using Data Cycle p @slidebreak @QandA{ -So far we have always worked with the Animals Starter File, which is a sample taken from a larger data set. + +So far we have always worked with the Animals Starter File, which is a sample taken from a larger dataset. + To complete this page we will be working with the @starter-file{expanded-animals}. @Q{What else do you Notice?} @A{*Be sure to surface the following:*} diff --git a/lessons/Data-Science/data-cycle/langs/en-us/pages/notes-data-cycle.adoc b/lessons/Data-Science/data-cycle/langs/en-us/pages/notes-data-cycle.adoc index 35305d8d091..bef3af3f309 100644 --- a/lessons/Data-Science/data-cycle/langs/en-us/pages/notes-data-cycle.adoc +++ b/lessons/Data-Science/data-cycle/langs/en-us/pages/notes-data-cycle.adoc @@ -27,13 +27,13 @@ Each question a Data Scientist asks adds a chapter to the story of their researc * *Arithmetic questions* - Answered by doing calculations (comparing, averaging, totaling, etc.) with values from one single column. Examples of arithmetic questions might be “How much does the heaviest animal weigh?” or “What is the average age of animals from the shelter?” - * *Statistical questions* - These are questions that both _expect some variability in the data_ related to the question and _account for it in the answers_. Statistical questions often involve multiple steps to answer, and the answers aren't black and white. When we compare two statistics we are actually comparing two data sets. If we ask "are dogs heavier than cats?", we know that not every dog is heavier than every cat! We just want to know if it is _generally_ true or _generally_ false! + * *Statistical questions* - These are questions that both _expect some variability in the data_ related to the question and _account for it in the answers_. Statistical questions often involve multiple steps to answer, and the answers aren't black and white. When we compare two statistics we are actually comparing two datasets. If we ask "are dogs heavier than cats?", we know that not every dog is heavier than every cat! We just want to know if it is _generally_ true or _generally_ false! * *Questions we can't answer* - We might wonder where the animal shelter is located, or what time of year the data was gathered! But the data in the table won’t help us answer that question, so as Data Scientists we might need to do some research beyond the data. And if nothing turns up, we simply recognize that there are limits to what we can analyze. @vspace{1ex} -- Next, we *Consider Data*, by determining which parts of the data set we need to answer our question. Sometimes we don't have the data we need, so we conduct a survey, observe and record data, or find another existing dataset. Since our data is contained in a table, it's useful to start by asking two questions: +- Next, we *Consider Data*, by determining which parts of the dataset we need to answer our question. Sometimes we don't have the data we need, so we conduct a survey, observe and record data, or find another existing dataset. Since our data is contained in a table, it's useful to start by asking two questions: * What rows do we care about? - Is it all the animals? Just the lizards? * What columns do we need? - Are we examining the ages of the animals? Their weights? diff --git a/lessons/Data-Science/histograms/langs/en-us/assessments/assessments.adoc b/lessons/Data-Science/dot-plots/langs/en-us/assessments/assessments.adoc similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/assessments/assessments.adoc rename to lessons/Data-Science/dot-plots/langs/en-us/assessments/assessments.adoc diff --git "a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-17 at 4.40.34\342\200\257PM.png" "b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-17 at 4.40.34\342\200\257PM.png" similarity index 100% rename from "lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-17 at 4.40.34\342\200\257PM.png" rename to "lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-17 at 4.40.34\342\200\257PM.png" diff --git "a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.28.02\342\200\257PM.png" "b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.28.02\342\200\257PM.png" similarity index 100% rename from "lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.28.02\342\200\257PM.png" rename to "lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.28.02\342\200\257PM.png" diff --git "a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.40.38\342\200\257PM.png" "b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.40.38\342\200\257PM.png" similarity index 100% rename from "lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.40.38\342\200\257PM.png" rename to "lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.40.38\342\200\257PM.png" diff --git "a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.40.49\342\200\257PM.png" "b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.40.49\342\200\257PM.png" similarity index 100% rename from "lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.40.49\342\200\257PM.png" rename to "lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.40.49\342\200\257PM.png" diff --git "a/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.49.39\342\200\257PM.png" "b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.49.39\342\200\257PM.png" new file mode 100644 index 00000000000..05e06714621 Binary files /dev/null and "b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.49.39\342\200\257PM.png" differ diff --git a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/attack.png b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/attack.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/attack.png rename to lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/attack.png diff --git a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/defense.png b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/defense.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/defense.png rename to lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/defense.png diff --git a/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds10.png b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds10.png new file mode 100644 index 00000000000..b1b69015d0e Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds10.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds100.png b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds100.png new file mode 100644 index 00000000000..38abd7bc6d5 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds100.png differ diff --git a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds100b.png b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds100b.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds100b.png rename to lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds100b.png diff --git a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds10b.png b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds10b.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds10b.png rename to lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds10b.png diff --git a/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds25.png b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds25.png new file mode 100644 index 00000000000..40f986ff0e5 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds25.png differ diff --git a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds25b.png b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds25b.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds25b.png rename to lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds25b.png diff --git a/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds5.png b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds5.png new file mode 100644 index 00000000000..fe8bb522e19 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds5.png differ diff --git a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds5a.png b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds5a.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds5a.png rename to lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds5a.png diff --git a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds5b.png b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds5b.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds5b.png rename to lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/pounds5b.png diff --git a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/weeks.png b/lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/weeks.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/weeks.png rename to lessons/Data-Science/dot-plots/langs/en-us/assessments/images-used-in-assessments/weeks.png diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/Ad.png b/lessons/Data-Science/dot-plots/langs/en-us/images/Ad.png new file mode 100644 index 00000000000..6092f6655f8 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/Ad.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/Ah.png b/lessons/Data-Science/dot-plots/langs/en-us/images/Ah.png new file mode 100644 index 00000000000..d9d45b6fd07 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/Ah.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/Bd.png b/lessons/Data-Science/dot-plots/langs/en-us/images/Bd.png new file mode 100644 index 00000000000..65a03d119c3 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/Bd.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/Bh.png b/lessons/Data-Science/dot-plots/langs/en-us/images/Bh.png new file mode 100644 index 00000000000..745c6246d4d Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/Bh.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/Cd.png b/lessons/Data-Science/dot-plots/langs/en-us/images/Cd.png new file mode 100644 index 00000000000..62f07a28cb2 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/Cd.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/Ch.png b/lessons/Data-Science/dot-plots/langs/en-us/images/Ch.png new file mode 100644 index 00000000000..0fc895099b9 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/Ch.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/Dd.png b/lessons/Data-Science/dot-plots/langs/en-us/images/Dd.png new file mode 100644 index 00000000000..0a606ccfbf0 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/Dd.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/Dh.png b/lessons/Data-Science/dot-plots/langs/en-us/images/Dh.png new file mode 100644 index 00000000000..72401ad2940 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/Dh.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/Ed.png b/lessons/Data-Science/dot-plots/langs/en-us/images/Ed.png new file mode 100644 index 00000000000..138d32fcc34 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/Ed.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/Eh.png b/lessons/Data-Science/dot-plots/langs/en-us/images/Eh.png new file mode 100644 index 00000000000..fc21665c1aa Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/Eh.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/Fd.png b/lessons/Data-Science/dot-plots/langs/en-us/images/Fd.png new file mode 100644 index 00000000000..5951345472f Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/Fd.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/Fh.png b/lessons/Data-Science/dot-plots/langs/en-us/images/Fh.png new file mode 100644 index 00000000000..2cc4ec04c40 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/Fh.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/dot-plot-pounds.png b/lessons/Data-Science/dot-plots/langs/en-us/images/dot-plot-pounds.png new file mode 100644 index 00000000000..bf8aaed0484 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/dot-plot-pounds.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/dot-plot-weeks.png b/lessons/Data-Science/dot-plots/langs/en-us/images/dot-plot-weeks.png new file mode 100644 index 00000000000..d59a4e82d37 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/dot-plot-weeks.png differ diff --git a/lessons/Data-Science/histograms/langs/en-us/images/histogram-a.png b/lessons/Data-Science/dot-plots/langs/en-us/images/histogram-a.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/images/histogram-a.png rename to lessons/Data-Science/dot-plots/langs/en-us/images/histogram-a.png diff --git a/lessons/Data-Science/histograms/langs/en-us/images/histogram-b.png b/lessons/Data-Science/dot-plots/langs/en-us/images/histogram-b.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/images/histogram-b.png rename to lessons/Data-Science/dot-plots/langs/en-us/images/histogram-b.png diff --git a/lessons/Data-Science/histograms/langs/en-us/images/histogram-c.png b/lessons/Data-Science/dot-plots/langs/en-us/images/histogram-c.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/images/histogram-c.png rename to lessons/Data-Science/dot-plots/langs/en-us/images/histogram-c.png diff --git a/lessons/Data-Science/histograms/langs/en-us/images/histogram-d.png b/lessons/Data-Science/dot-plots/langs/en-us/images/histogram-d.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/images/histogram-d.png rename to lessons/Data-Science/dot-plots/langs/en-us/images/histogram-d.png diff --git a/lessons/Data-Science/histograms/langs/en-us/images/histogram-e.png b/lessons/Data-Science/dot-plots/langs/en-us/images/histogram-e.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/images/histogram-e.png rename to lessons/Data-Science/dot-plots/langs/en-us/images/histogram-e.png diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/lesson-images.json b/lessons/Data-Science/dot-plots/langs/en-us/images/lesson-images.json new file mode 100644 index 00000000000..11b6e35f9bf --- /dev/null +++ b/lessons/Data-Science/dot-plots/langs/en-us/images/lesson-images.json @@ -0,0 +1,122 @@ +{ + "histogram-a.png": { + "description": "Histogram with 6 columns whose respective heights from left to right are 1, 1, 3, 3, 1, 1", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histogram-b.png": { + "description": "histogram with 10 bars equal in height based on contrived data", + "source" : "Created by the Bootstrap Team", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histogram-c.png": { + "description": "Histogram with 9 bars whose heights from left to right are 0,1,3,3,1,0,0,1,1", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histogram-d.png": { + "description": "Histogram with 9 bars whose heights from left to right are 0,1,1,0,0,1,3,3,1", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histogram-e.png": { + "description": "Histogram with 11 bars whose heights from left to right are 0,2,2,1,0,0,0,0,1,2,2", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "pounds.png": { + "description": "Histogram for the pounds column of the animals dataset", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "dot-plot-pounds.png": { + "description": "Dot plot for the pounds column of the animals dataset", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "weeks.png": { + "description": "Histogram for the weeks column of the animals dataset", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "number-line.png": { + "description": "A blank number line number 0 to 15, for students to add data collected during class", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "Ad.png": { + "description": "Dot plot A", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "Ah.png": { + "description": "Histogram A", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "Bd.png": { + "description": "Dot plot B", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "Bh.png": { + "description": "Histogram B", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "Cd.png": { + "description": "Dot plot C", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "Ch.png": { + "description": "Histogram C", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "Dd.png": { + "description": "Dot plot D", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "Dh.png": { + "description": "Histogram D", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "Ed.png": { + "description": "Dot plot E", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "Eh.png": { + "description": "Histogram E", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "Fd.png": { + "description": "Dot plot F", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "Fh.png": { + "description": "Histogram F", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "names1.png": { + "description": "A dot plot showing the distribution of name lengths for a group of students", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "names2.png": { + "description": "A dot plot showing the distribution of name lengths for a group of students", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "dot-plot-weeks.png": { + "description": "A dot plot showing the distribution of weeks animals spend at the shelter", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + } +} diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/names1.png b/lessons/Data-Science/dot-plots/langs/en-us/images/names1.png new file mode 100644 index 00000000000..1fea1413ac0 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/names1.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/names2.png b/lessons/Data-Science/dot-plots/langs/en-us/images/names2.png new file mode 100644 index 00000000000..210fa273398 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/names2.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/number-line.png b/lessons/Data-Science/dot-plots/langs/en-us/images/number-line.png new file mode 100644 index 00000000000..0329be80aa7 Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/number-line.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/pounds.png b/lessons/Data-Science/dot-plots/langs/en-us/images/pounds.png new file mode 100644 index 00000000000..1d6c24c445c Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/pounds.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/images/weeks.png b/lessons/Data-Science/dot-plots/langs/en-us/images/weeks.png new file mode 100644 index 00000000000..1ed298e029b Binary files /dev/null and b/lessons/Data-Science/dot-plots/langs/en-us/images/weeks.png differ diff --git a/lessons/Data-Science/dot-plots/langs/en-us/index.adoc b/lessons/Data-Science/dot-plots/langs/en-us/index.adoc new file mode 100644 index 00000000000..4e9e4209479 --- /dev/null +++ b/lessons/Data-Science/dot-plots/langs/en-us/index.adoc @@ -0,0 +1,273 @@ += Dot Plots + +@description{Students create and interpret dot plots, considering the distribution and typicality of the data. Students compare and contrast dot plots and histograms. } + +@ifproglang{pyret}{ +@lesson-prereqs{contracts-tables, contracts-visualizations, ds-intro} +} + +@ifproglang{codap}{ +@lesson-prereqs{codap-dot-plots-bar-charts} +} + +@keywords{dot plot, distribution, typicality} + +@add-to-lang{histogram} + +[@lesson-intro-table] +|=== +| Lesson Goals +| Students will be able to... + +@objectives + + +| Student-facing Lesson Goals +| + +- Let's create dot plots and learn how to interpret them. + +| Materials +|[.materials-links] + +@material-links + +| Preparation +| Decide if students will create a communal dot plot (1) via live survey or (2) via each student drawing one data point on a shared data visualization. BLAH BLAH BLAH +|=== + + +== Dot Plots' Distribution and Typicality + +@objective{make-dot-plots} +@objective{dot-plot-features} + +=== Overview + +Students create and interpret dot plots, learning new vocabulary to informally describe a dataset's distribution and typicality. + +=== Launch + +@teacher{Before class, decide if you want your students to create a communal dot plot (1) via live survey, or (2) via each student drawing their own data point on a shared display. + +If you opt to do the live survey... FILL THIS IN. + +If you opt to have each student draw a data point: Draw or project a number line on a piece of chart paper or on the board. Your number line should start at zero and go up to 15 by ones. If you have a student with a name that is more than 15 letters, extend the number line accordingly.} + +@lesson-instruction{ + +- Count how many letters are in your first name. +- Once you have counted, follow your teachers' instructions to do one of the following: +** Complete the live survey and watch our class data visualization appear and evolve. +** Line up at the board to draw a dot above the number of letters in your first name. You may stack dots, but try to keep them evenly spaced. + +@image{images/number-line.png} + +- Congratulations! Your individual name length is now represented in our class dot plot. +- Copy the class dot plot onto the top section of @printable-exercise{our-class-name-data.adoc}. +- Put @printable-exercise{our-class-name-data.adoc} aside for now. We will return to it later in the lesson. +} + +@strategy{Dot Plots?!}{ +If you teach students who are older than 10 or 11 years old, you may be asking yourself: Why dot plots? Aren't those a little elementary? + +Students are generally successful interpreting dot plots (compared to, say, box plots and histograms) because on a dot plot, _individual cases are visible_. Educational research tells us that interpreting box plots and histograms is often difficult for students because they tend to view data as individual cases. Box plots and histograms only provide an _aggregate_ view. + +To combat this challenge, @citation{bakker-et-al-2005, "Bakker, Biehler, and Konold (2005)"} recommend building a strong foundation with data visualizations where _individual cases are visible_. In short: don't gloss over dot plots! When introducing dot plots and histograms, the research recommends pairing the less-familiar aggregate data visualizations with their corresponding (familiar) dot plots as we do in this lesson and others. +} + + +=== Investigate + + +@lesson-instruction{ +- Turn to @printable-exercise{interpret-dot-plots.adoc} and complete the first section: Reading a Dot Plot. +- Be prepared to discuss your answers with the class. +} + +@teacher{Review students' responses as a class. Questions 1, 2, and 3 touch on three relevant concepts: range, mode, and proportional reasoning. } + +Now that we are comfortable reading dot plots, we need a common vocabulary to discuss the data that they display. To describe the distribution of data--the way that it is spread out on a number line--it is helpful to locate any outliers, clusters, peaks, and gaps. + +- A *cluster* is a group of data points that are close together. +- A *gap* is an interval where there are no data points. +- An *outlier* occurs when one data point is much larger or smaller than the other data points. +- A *peak* is the value(s) with the most data. + +@lesson-instruction{ + +@center{@image{images/names1.png, 250}} + +- Let's complete the second section of @printable-exercise{interpret-dot-plots.adoc} together using the data in the dot plot for Group A. +} +@QandA{ +@Q{What peaks should we label?} +@A{There is a peak at 4.} + +@Q{What clusters should we label?} +@A{There is a cluster from 3 to 6.} + +@Q{What gaps should we label?} +@A{There are gaps at 7 and 10.} + +@Q{What outliers should we label?} +@A{There is an outlier at 12.} + +@Q{Now let's turn to question 5. What do those peaks, clusters, gaps, and outliers tell us about the dataset?} +} + +@lesson-instruction{ +Complete the third section with your partner. +} + +@teacher{ +Discuss and review students' responses. Students will complete the final section of @printable-exercise{interpret-dot-plots.adoc} after a brief class discussion on typicality. +} + +Another way of describing data on a dot plot is to think about its typicality. + +@QandA{ +@Q{Let's think about the word "typical". Describe a "typical" morning for you.} +@A{Invite students' to share. Emphasize that "typical" is "the usual", or "what's expected", but it is not always a perfect predictor. It may be "typical" to eat breakfast at 7am, but there are probably days where you eat a little bit earlier or a little bit later - or even much earlier or much later!} + +@Q{What does the word "typical" mean to you?} +} + +@lesson-instruction{ +Complete the final section of @printable-exercise{interpret-dot-plots.adoc}. +} + +@teacher{Review students' responses, emphasizing that there are multiple ways to decide what is typical in a dataset. You may want to highlight a few different and appropriate responses to highlight that we are simply _estimating_ typicality. Some students may have located the most common value (or mode), while others may have found the middle value (median), or the balance point of the data (mean).} + +Let's read and interpret the dot plot representing *our* class' name length data. + +@lesson-instruction{ +- With a partner, complete @printable-exercise{our-class-name-data.adoc}. +} + +@QandA{ +@Q{In what ways was our class data similar to the data from Group A and/or Group B on @printable-exercise{interpret-dot-plots.adoc}?} + +@Q{Was there anything that made our class data unique?} +} + +=== Synthesize + +@QandA{ + +@Q{When determining what value is typical, why was it helpful to consider peaks, clusters, gaps, and outliers in the dataset?} +@A{A peak indicates a name length that is the most common--which is one way of thinking about what's typical.} +@A{There might be a cluster where most of the data falls, which would likely be where would locate what's typical.} +@A{If we want to find a balance point for all of the data (yet another way of thinking about what is typical), then we need to consider gaps and outliers.} + +@Q{What were some of the different strategies you used to choose a typical value in the dataset?} +@A{This question is designed to prime students to recognize that what's typical generally exists at the center of the data. Students will likely identify the values that (approximately) represent the mean, median, and mode(s). It is fine if students are not yet able to recognize these measures of center, which they will explore during @lesson-link{measures-of-center}.} +} + + +== From Dot Plots to Histograms + +@objective{dot-plot-v-histogram} + +=== Overview + +Students learn about histograms, considering how they are both similar to and different from dot plots. + + +=== Launch + +A histogram is another data visualization commonly used to display quantitative data. Let's explore the ways that histograms are similar to and different from dot plots. + +@lesson-instruction{ +- Turn to @printable-exercise{compare-dot-plots-histograms.adoc}. +- Complete the first section. Be prepared to share your responses. +} + +@QandA{ +@Q{What do you Notice about the dot plot (left) and the histogram (right), which both display distribution of weeks?} +@A{Possible responses: the histogram has bars that are touching; I can see that the gaps and peaks are in the same place; I can't see individual data points.} + +@Q{What do you Wonder about the dot plot and histogram?} +@A{Possible responses: Why do we need two displays that are so similar? How do I read and interpret a histogram? What are histograms useful for?} + +[cols="^.>8a,1,^.>8a", frame="none", grid="none"] +|=== +| @image{images/dot-plot-weeks.png, 400} +| +| @image{images/weeks.png, 350} +|=== + +} + + +=== Investigate + +Now, let's think more deeply about the sort of information that dot plots and histograms provide us. + + +@lesson-instruction{ +- Use *only the dot plot* to answer the questions on the second section of @printable-exercise{compare-dot-plots-histograms.adoc}. +- Record your responses in the *dot plot* column of the table. If there is a question that you *cannot* answer, put an X. +} + + +Next, we'll use a histogram to answer the same questions. + +@lesson-instruction{ +- Return to @printable-exercise{compare-dot-plots-histograms.adoc}. +- Use *only the histogram* to answer the same set of questions. Record your responses in the *histogram* column of the table. If there is a question that you cannot answer, put an X. +} + +@QandA{ +@Q{Given that this is your first time interpreting a histogram: What questions do you have about reading a histogram?} + +@A{Possible questions include: +- *How is this different from a bar chart?* Histograms show the distribution of quantitative data, not categorical. Histograms' bars cannot be reordered, as they allow us to see the shape of a dataset. We can reorder bars in a bar chart. +- *Are the values on the tick marks included in the bar?* On a histogram, each bar _includes_ the left-end value but not the right-end value.} +- *How was the size of the intervals (bins) determined?* This is a great question! But it's too big to tackle today. Bin size varies depending on the data being displayed. We will explore this and other questions about histograms in @lesson-link{histograms-visualize}. +} + +@lesson-instruction{ +- Respond to the questions in the Reflection section of @printable-exercise{compare-dot-plots-histograms.adoc}. +} + + +@teacher{Invite students to share their responses, emphasizing the important idea that histograms display aggregate information rather than individual cases.} + +@strategy{Histograms Don't Display Individual Data Points!}{ + +Dot plots and histograms have a lot in common... so why is interpreting histograms a challenge for many students? + +According to research conducted by @citation{bakker-et-al-2005, "Bakker, Biehler, and Konold (2005)"}, students are inclined to view data as _individual cases_. Histograms, however, display _aggregate information_. + +How do we prevent this misconception? The authors have two recommendations: + +- Spend ample time learning about data visualizations _where individual cases are visible_ to establish a solid foundation. +- When aggregate plots *are* introduced, pair them with representations that allow students to see individual cases. + +} + +Lets think more deeply about dot plots and histograms, two data visualizations of both the frequency and distribution of quantitative data. + +@lesson-instruction{ +- Complete @printable-exercise{match-dot-plots-histograms.adoc}. +- What was your strategy for matching dot plots and histograms? +} + + + +=== Synthesize + + +@QandA{ + +@Q{How are the two displays alike?} +@A{They both display the frequency and distribution of quantitative data. They both show the total number of values.} + +@Q{How are the two displays different?} +@A{We can see individual points on the dot plot, but on the histogram, we just get a collective overview of the data. There is no way to single out one particular animal's weight on the histogram.} + +@Q{When might a histogram be more useful than a dot plot? When might a dot plot be more useful than a histogram?} +@A{If we have a large dataset and we want to get a collective overview of the data, a histogram would be more useful. If we need to look at individual data points in a smaller dataset, we should use a dot plot.} + +} diff --git a/lessons/Data-Science/dot-plots/langs/en-us/pages/compare-dot-plots-histograms.adoc b/lessons/Data-Science/dot-plots/langs/en-us/pages/compare-dot-plots-histograms.adoc new file mode 100644 index 00000000000..74801c60bca --- /dev/null +++ b/lessons/Data-Science/dot-plots/langs/en-us/pages/compare-dot-plots-histograms.adoc @@ -0,0 +1,92 @@ += Comparing Dot Plots and Histograms + +The displays below both show the distribution of weeks that animals spend at the shelter. + +== Notice and Wonder + +@vspace{1ex} + +++++ + +++++ + +[cols="^.>1a,^.>1a", frame="none"] +|=== +| @image{../images/dot-plot-weeks.png, 250} +| @image{../images/weeks.png, 225} +|=== + +@n What do you Notice about the dot plot (left) and the histogram (right)? What do you Wonder? @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + + +== Dot Plots versus Histograms +Answer the questions below using only the dot plot. Then answer the questions with the histogram. If you cannot answer a question, write "X". + +@vspace{1ex} + +[cols="<.^4a,^.^1a, ^.^1a", options="header", shading="none"] +|=== + +| Question | Dot Plot | Histogram + +| @n How many animals were in the shelter for fewer than 10 weeks? | @ifsoln{29} | @ifsoln{29} +| @n How many animals were in the shelter for exactly 30 weeks? | @ifsoln{1} | @ifsoln{x} +| @n What is the longest amount of time that an animal stayed in the shelter? | @ifsoln{30 weeks}| @ifsoln{x} +| @n How many animals were in the shelter for at least 5 weeks but not more than 25? | @ifsoln{x} | @ifsoln{13} +| @n Are there any gaps in the data? | @ifsoln{10-22} | @ifsoln{15-20, 25-30} +| @n Are there any peaks in the data? | @ifsoln{at 5} | @ifsoln{from 0-5} + +|=== + +@vspace{1ex} + +== Reflect + +@vspace{1ex} + +[cols="1a, 1a"] +|=== + +| +@n When you answered the questions using the dot *plot*: + +... Which questions were *easy* to answer? @fitb{}{@ifsoln{b, c, e}} +... Which questions were *hard* to answer? @fitb{}{@ifsoln{a}} +... Which questions were *impossible* to answer? @fitb{}{@ifsoln{d}} + +@n When you answered the questions using the *histogram*: + +... Which questions were *easy* to answer? @fitb{}{@ifsoln{a, d}} +... Which questions were *hard* to answer? @fitb{}{@ifsoln{e}} +... Which questions were *impossible* to answer? @fitb{}{@ifsoln{b, c}} + +| + +@n When might a histogram be more useful than a dot plot? + +@fitb{}{@ifsoln{A histogram would be more useful for a very large dataset,}} + +@fitb{}{@ifsoln{where you want to identify trends rather than look at}} + +@fitb{}{@ifsoln{individual data points.}} + +@vspace{1ex} + +@n When might a dot plot be more useful than a histogram? + +@fitb{}{@ifsoln{A dot plot is more useful if you want to know about}} + +@fitb{}{@ifsoln{individual data points.}} + +@fitb{}{} + +|=== + diff --git a/lessons/Data-Science/dot-plots/langs/en-us/pages/interpret-dot-plots.adoc b/lessons/Data-Science/dot-plots/langs/en-us/pages/interpret-dot-plots.adoc new file mode 100644 index 00000000000..b436855a5c5 --- /dev/null +++ b/lessons/Data-Science/dot-plots/langs/en-us/pages/interpret-dot-plots.adoc @@ -0,0 +1,65 @@ += Interpreting Dot Plots + +++++ + +++++ + +== Reading a Dot Plot (Group A) + +The dot plot below is a name length data visualization created by a group of 25 students (*Group A*). + +@vspace{1ex} + +@center{@image{../images/names1.png, 250}} + +@n What is the difference (in letters) between the longest and shortest name? @fitb{}{@ifsoln{8 letters}} + +@n What is the most common name length? @fitb{}{@ifsoln{4 letters}} + +@n What fraction of students have first names that are 5 letters long? @fitb{}{@ifsoln{2/25}} + +== Interpreting Peaks, Clusters, Gaps, and Outliers + +@n The distribution of the data is the way that it is spread out on the number line. One way to describe distribution is by identifying peaks, clusters, gaps, and outliers. As a class, label any peaks, clusters, gaps, or outliers on the dot plot *above*. + +@n Let's think about what those peaks, clusters, gaps and outliers *tell* us about the dataset. In the dot plot above: + +- the peak indicates that @fitb{1em}{@ifsoln{4}} letters is the most common name length +- the cluster indicates that many students' names are @fitb{1em}{@ifsoln{3-6}} letters +- the gaps tell us that, in this sample, no students have names that are @fitb{1em}{@ifsoln{7}} letters or @fitb{1em}{@ifsoln{10}} letters +- the outlier is @fitb{1em}{@ifsoln{11}} letters, telling us that longer names are uncommon in this sample. + +== Reading a Dot Plot (Group B) + +@center {@image{../images/names2.png, 250}} + +@n Label the peaks, clusters, gaps, and outliers of this new dot plot representing the name lengths of a different group of 25 students (Group B). + +@n What do the peaks, clusters, gaps, and outliers tell you about the dataset? + +@fitb{}{@ifsoln{3 letters and 6 letters. That means many students have names that are 3-6 letters, especially }} + +@fitb{}{@ifsoln{4 letters. There is a gap at 7 letters and 9 letters, so no one's names are that length.}} + +== Typicality of Name Length Data + + +@n What do you think is a typical value in Group A? @fitb{2em}{@ifsoln{4}}(There is more than one correct response.) Explain your reasoning. @fitb{}{@ifsoln{Four letters}} + +@fitb{}{@ifsoln{the most common name length in this group of students.}} + + +@n Identify another value someone else might claim is typical of Group A. @fitb{2em}{@ifsoln{5}} Why would they choose that value? @fitb{}{@ifsoln{Although more than}} + +@fitb{}{@ifsoln{half of students have 3-4 letter names, some large outliers can increase what we think of as typical.}} + + +@n Would 6 letters be a good description of the typical number of letters in students' names for Group B? @fitb{5em}{@ifsoln{Yes}} +Explain. @fitb{}{@ifsoln{6 letters is the}} + +@fitb{}{@ifsoln{most common name length. Many students have shorter names so it would be reasonable to choose a smaller value, too; 5 is also the middle value.}} + diff --git a/lessons/Data-Science/dot-plots/langs/en-us/pages/match-dot-plots-histograms.adoc b/lessons/Data-Science/dot-plots/langs/en-us/pages/match-dot-plots-histograms.adoc new file mode 100644 index 00000000000..bb620f0f0ef --- /dev/null +++ b/lessons/Data-Science/dot-plots/langs/en-us/pages/match-dot-plots-histograms.adoc @@ -0,0 +1,44 @@ += Matching Dot Plots and Histograms + +++++ + +++++ +Draw a line from each dot plot on the left to the corresponding histogram on the right. + + + +[.FillVerticalSpace, cols="^.^10a,^.^3a,5a,^.^1a,^.^10a", options="header", stripes="none", grid="none", frame="none"] +|=== +| Dot Plot +||| +| Histogram + +| @image{../images/Ad.png } +|*@n* @ifsoln{C} ||*A* +| @image{../images/Bh.png } + +| @image{../images/Bd.png } +|*@n* @ifsoln{A} ||*B* +| @image{../images/Dh.png } + +| @image{../images/Cd.png } +|*@n* @ifsoln{F} ||*C* +| @image{../images/Ah.png } + +| @image{../images/Dd.png } +|*@n* @ifsoln{B} ||*D* +| @image{../images/Eh.png } + +| @image{../images/Ed.png } +|*@n* @ifsoln{D} ||*E* +| @image{../images/Fh.png } + +| @image{../images/Fd.png } +|*@n* @ifsoln{E} ||*F* +| @image{../images/Ch.png } + + +|=== diff --git a/lessons/Data-Science/dot-plots/langs/en-us/pages/our-class-name-data.adoc b/lessons/Data-Science/dot-plots/langs/en-us/pages/our-class-name-data.adoc new file mode 100644 index 00000000000..420c4a4d10c --- /dev/null +++ b/lessons/Data-Science/dot-plots/langs/en-us/pages/our-class-name-data.adoc @@ -0,0 +1,64 @@ += Our Class' Name Length Data + +== Create a Dot Plot: Length of First Names in My Class + +@vspace{10ex} + +@center{@image{../images/number-line.png, 500}} + +== Reading a Dot Plot + +@n What is the difference (in letters) between the longest name and the shortest name? @fitb{}{} + +@n What is/are the most common name length(s)? @fitb{}{} + +@n What fraction of students have first names that are 5 letters long? @fitb{}{} + +== Peaks, Clusters, Gaps, and Outliers in Name Length Data + +@n Label any peaks, clusters, gaps, and outliers on the class dot plot (above). + +@n Describe what you can conclude about students' name lengths in your class, based on those peaks, clusters, gaps, and outliers. @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + +== Typicality of Name Length Data + + +@n What is one possible typical value for class name length? @fitb{5em}{} Explain. @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + + +@n Give another possible typical value: @fitb{5em}{}. Explain why it is appropriate. @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + +== Compare + +@n Compare and contrast your class dataset with either Group A or Group B from @printable-exercise{interpret-dot-plots.adoc}. Give at least one way that the distributions are alike, and at least one way that they are different. @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + + + + + diff --git a/lessons/Data-Science/dot-plots/langs/en-us/pages/workbook-pages.txt b/lessons/Data-Science/dot-plots/langs/en-us/pages/workbook-pages.txt new file mode 100644 index 00000000000..25c02e9b465 --- /dev/null +++ b/lessons/Data-Science/dot-plots/langs/en-us/pages/workbook-pages.txt @@ -0,0 +1,4 @@ +our-class-name-data.adoc +interpret-dot-plots.adoc +compare-dot-plots-histograms.adoc +match-dot-plots-histograms.adoc \ No newline at end of file diff --git a/lessons/Data-Science/histograms/langs/en-us/proglang.txt b/lessons/Data-Science/dot-plots/langs/en-us/proglang.txt similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/proglang.txt rename to lessons/Data-Science/dot-plots/langs/en-us/proglang.txt diff --git a/lessons/Data-Science/histograms/langs/en-us/slides-codap.id b/lessons/Data-Science/dot-plots/langs/en-us/slides-codap.id similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/slides-codap.id rename to lessons/Data-Science/dot-plots/langs/en-us/slides-codap.id diff --git a/lessons/Data-Science/histograms/langs/en-us/slides-pyret.id b/lessons/Data-Science/dot-plots/langs/en-us/slides-pyret.id similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/slides-pyret.id rename to lessons/Data-Science/dot-plots/langs/en-us/slides-pyret.id diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/assessments/assessments.adoc b/lessons/Data-Science/histograms-interpret/langs/en-us/assessments/assessments.adoc similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/assessments/assessments.adoc rename to lessons/Data-Science/histograms-interpret/langs/en-us/assessments/assessments.adoc diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsl.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsl.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsl.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsl.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsl2.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsl2.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsl2.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsl2.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsr2.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsr2.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsr2.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsr2.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsr3.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsr3.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsr3.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsr3.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsr4.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsr4.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsr4.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsr4.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsr5.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsr5.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsr5.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsr5.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsr6.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsr6.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsr6.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsr6.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsr7.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsr7.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsr7.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsr7.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsr8.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsr8.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsr8.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsr8.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsym1.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsym1.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsym1.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsym1.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsym2.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsym2.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsym2.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsym2.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsym3.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsym3.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsym3.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsym3.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsym4.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsym4.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsym4.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsym4.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsym5.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsym5.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsym5.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsym5.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsym6.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsym6.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/boxsym6.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/boxsym6.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsl.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsl.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsl.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsl.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsr.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsr.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsr.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsr.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsr2.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsr2.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsr2.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsr2.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsr3.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsr3.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsr3.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsr3.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsr4.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsr4.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsr4.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsr4.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsr5.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsr5.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsr5.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsr5.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsr6.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsr6.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsr6.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsr6.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsym1.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsym1.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsym1.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsym1.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsym2.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsym2.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsym2.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsym2.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsym3.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsym3.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsym3.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsym3.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsym4.png b/lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsym4.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/authoring/histsym4.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/authoring/histsym4.png diff --git a/lessons/Data-Science/histograms/langs/en-us/images/InterpretData.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/InterpretData.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/images/InterpretData.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/images/InterpretData.png diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/class1quiz.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/class1quiz.png new file mode 100644 index 00000000000..dd9d65062f0 Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/class1quiz.png differ diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/class2quiz.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/class2quiz.png new file mode 100644 index 00000000000..cf4e4a12b1c Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/class2quiz.png differ diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/emma-miles.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/emma-miles.png new file mode 100644 index 00000000000..50a9da4de9f Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/emma-miles.png differ diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/height-outlier.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/height-outlier.png new file mode 100644 index 00000000000..738265b8adf Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/height-outlier.png differ diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/histogram-a.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/histogram-a.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/histogram-a.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/images/histogram-a.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/histogram-b.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/histogram-b.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/histogram-b.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/images/histogram-b.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/histogram-c.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/histogram-c.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/histogram-c.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/images/histogram-c.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/histogram-d.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/histogram-d.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/histogram-d.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/images/histogram-d.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/histogram-e.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/histogram-e.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/histogram-e.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/images/histogram-e.png diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/histogram-symm.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/histogram-symm.png new file mode 100644 index 00000000000..8b899ed5d38 Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/histogram-symm.png differ diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/income.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/income.png new file mode 100644 index 00000000000..720d4e08ea4 Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/income.png differ diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/jeans-cost-6th.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/jeans-cost-6th.png new file mode 100644 index 00000000000..2e0e5daf09f Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/jeans-cost-6th.png differ diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/jeans-cost-8th.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/jeans-cost-8th.png new file mode 100644 index 00000000000..514d7d5a4ea Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/jeans-cost-8th.png differ diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/launch-histo-c.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/launch-histo-c.png new file mode 100644 index 00000000000..b4acc923a06 Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/launch-histo-c.png differ diff --git a/lessons/Data-Science/histograms/langs/en-us/images/left-w-foot.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/left-w-foot.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/images/left-w-foot.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/images/left-w-foot.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/left.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/left.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/left.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/images/left.png diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/lesson-images.json b/lessons/Data-Science/histograms-interpret/langs/en-us/images/lesson-images.json new file mode 100644 index 00000000000..2126b37285e --- /dev/null +++ b/lessons/Data-Science/histograms-interpret/langs/en-us/images/lesson-images.json @@ -0,0 +1,122 @@ +{ + "histogram-a.png": { + "description": "Histogram with 6 columns whose respective heights from left to right are 1, 1, 3, 3, 1, 1", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histogram-b.png": { + "description": "histogram with 10 bars equal in height", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histogram-c.png": { + "description": "Histogram with 9 bars whose heights from left to right are 0,1,3,3,1,0,0,1,1", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histogram-d.png": { + "description": "Histogram with 9 bars whose heights from left to right are 0,1,1,0,0,1,3,3,1", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histogram-e.png": { + "description": "Histogram with 11 bars whose heights from left to right are 0,2,2,1,0,0,0,0,1,2,2", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "symmetric.png": { + "description": "A hill-shaped histogram, with both sides sloping away from the peak equally", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "left-w-foot.png": { + "description": "A hill-shaped histogram, with a clump of taller bars on the right side, and smaller bars trailing off to the left=", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "right-w-foot.png": { + "description": "A hill-shaped histogram, with a clump of taller bars on the left side, and smaller bars trailing off to the right side", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "InterpretData.png" : { + "description" : "Interpret Data", + "source" : "Created by the Bootstrap Team based on work from @link{http://introdatascience.org/, Mobilizing IDS project} and @link{https://www.amstat.org/asa/files/pdfs/GAISE/GAISEPreK12_Intro.pdf, GAISE}", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "skew-right.png": { + "description": "A hill-shaped histogram, with a clump of taller bars on the left side, and smaller bars trailing off to the right", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "jeans-cost-6th.png": { + "description": "A histogram showing cost of jeans purchased by sixth graders", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "jeans-cost-8th.png": { + "description": "A histogram showing cost of jeans purchased by eighth graders", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "number-children.png": { + "description": "A histogram showing number of children per home in a neighborhood", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "sat-verbal.png": { + "description": "A histogram showing SAT verbal scores for a group of 205 students", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "income.png": { + "description": "A histogram income levels for families in Knoxville", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "launch-histo-c.png": { + "description": "a histogram that is symmetric with a peak in the middle", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histogram-symm.png": { + "description": "A flat symmetric histogram", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "class1quiz.png": { + "description": "A symmetric histogram", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "class2quiz.png": { + "description": "A symmetric histogram", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "travel-time-a.png": { + "description": "A symmetric histogram", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "travel-time-b.png": { + "description": "A symmetric histogram", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "logic-puzzle.png": { + "description": "A symmetric histogram", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "rock-climbing-ages.png": { + "description": "A symmetric histogram", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "emma-miles.png": { + "description": "A skew-left histogram displaying the number of miles Emma ran each month in 2023", + "source" : "Created by the Bootstrap Team in Pyret", + "license" : "Creative Commons 4.0 - NC - SA" + } +} diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/logic-puzzle.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/logic-puzzle.png new file mode 100644 index 00000000000..404f4988de4 Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/logic-puzzle.png differ diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/number-children.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/number-children.png new file mode 100644 index 00000000000..1e31ae2c75e Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/number-children.png differ diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/pounds.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/pounds.png new file mode 100644 index 00000000000..6962ce16dfb Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/pounds.png differ diff --git a/lessons/Data-Science/histograms/langs/en-us/images/right-w-foot.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/right-w-foot.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/images/right-w-foot.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/images/right-w-foot.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/right.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/right.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/right.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/images/right.png diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/rock-climbing-ages.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/rock-climbing-ages.png new file mode 100644 index 00000000000..334c61c8833 Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/rock-climbing-ages.png differ diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/sat-verbal.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/sat-verbal.png new file mode 100644 index 00000000000..adf3e3aa402 Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/sat-verbal.png differ diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/skew-right.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/skew-right.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/skew-right.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/images/skew-right.png diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/symmetric.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/symmetric.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/symmetric.png rename to lessons/Data-Science/histograms-interpret/langs/en-us/images/symmetric.png diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/travel-time-a.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/travel-time-a.png new file mode 100644 index 00000000000..74f7ba7c954 Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/travel-time-a.png differ diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/images/travel-time-b.png b/lessons/Data-Science/histograms-interpret/langs/en-us/images/travel-time-b.png new file mode 100644 index 00000000000..d2941d1ace0 Binary files /dev/null and b/lessons/Data-Science/histograms-interpret/langs/en-us/images/travel-time-b.png differ diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/index.adoc b/lessons/Data-Science/histograms-interpret/langs/en-us/index.adoc new file mode 100644 index 00000000000..7ed65a0cc9a --- /dev/null +++ b/lessons/Data-Science/histograms-interpret/langs/en-us/index.adoc @@ -0,0 +1,359 @@ += Interpreting the "Shape" of Data + +@description{Students explore how their understanding of histogram "shape" can help them to interpret data.} + +@lesson-prereqs{histograms-visualize} + +@keywords{histogram, bin, interval} + +[@lesson-intro-table] +|=== +| Lesson Goals +| Students will be able to... + +@objectives + +| Student-facing Lesson Goals +| + +- Let's investigate what the shape of a histogram can tell us about the data. + +| Materials +|[.materials-links] + +@material-links + +| Preparation +| + +There is an activity during this lesson that requires flip chart paper, tape, and markers. Before class, tape flip chart paper to the walls of your classroom--one poster per team of three students. On each piece of flip chart paper, write "Skew Left", "Skew Right", or "Symmetric", and then draw lines to divide the paper horizontally into three equal sections. Note that when the lesson is over, you will want to save students' chart papers for use during @lesson-link{box-plots}. + +|=== + + +== Interpreting Shape + +@objective{histogram-shape} + +=== Overview + +Students consider what different distributions tell us about a dataset, connecting different scenarios with different data visualizations. + +=== Launch + +We now have the terminology needed to describe histogram shape. But what can we actually conclude about a dataset based on a histogram's shape? + +@lesson-instruction{ + +Let's consider two different scenarios: + +- The average US woman gives birth around age 26, but some do even after 45! Children do not give birth. + +- Most adults have close to a full set of 32 teeth, but a few hockey players might have a very small number of teeth. No one has more than 32 teeth. + +*Let's draw a rough sketch of what we think the histogram for each of these scenarios might look like.*} + +@teacher{ +Invite a student to the board to draw a rough sketch of each scenario. Invite the students to describe how they decided where to draw the clusters, peaks, and outliers.} + +@QandA{ +@Q{Which of the above scenarios describes a left skew? Explain.} +@A{The scenario about women's ages describes a left skew. Most of the data is clustered around 26, but there is also data further on the right, all the way up to 45. There is no data around, say, age 7.} + +@Q{Which of the above scenarios describes a right skew? Explain.} +@A{The scenario about teeth describes a right skew. Most of the data is clustered around 32 with a few outliers further to the left.} +} + +@lesson-instruction{ +- With a partner, complete @printable-exercise{use-shape-to-interpret.adoc}. +- Just like we did during the warm up, you will sketch rough histograms, make a decision their shape, and then interpret the data. +} + +=== Investigate + +@teacher{ + +There is some setup required for the interactive activity that follows. + +- You will need flip chart paper, tape, and markers. +- Divide your class into teams of three. All around your classroom, tape flip chart paper to the wall--one poster per team of three students. +- On each piece of flip chart paper, write "Skew Left", "Skew Right", or "Symmetric", and then draw lines to divide the paper horizontally into three equal sections. +} + +@lesson-instruction{ +- Your teacher has instructed you and your group to stand in front of a blank poster that says "Symmetric", "Skew Left", or "Skew Right" at the top. +- The paper is divided into three horizontal sections. In the top section of your paper, draw a histogram that matches the shape assigned to you. +} + +@teacher{Once all students have drawn their histograms, direct them to rotate to the next poster with a different shape.} + +@lesson-instruction{ +- You are now standing in front of a poster that identifies a shape and also includes a histogram of that shape. +- In the second horizontal section of the poster, write a scenario or describe a dataset that would result in that distribution. +- Leave the third section of the poster blank. We will use it another day! +- Take a few minutes to walk around the classroom and read the scenarios your peers developed. Return to your seat when you have had a chance to see all of the posters. +} + +@teacher{ +Spend some time debriefing with students, using the prompts below. Responses will vary for each group of students. +} + +@QandA{ +@Q{Did all of the scenarios your classmates developed correctly represent the given shape?} + +@Q{Did you notice any especially creative scenarios? Any surprising scenarios?} + +@Q{Were there any scenarios that were represented more than once? Why do you think those scenarios were popular?} +} + +@teacher{ +The third section of the posters will remain blank today. Students will complete this section in the future when they learn about @lesson-link{box-plots}. +} + + +@lesson-instruction{ +Try it on your own! Complete @printable-exercise{reading-histograms-matching.adoc}, where you will match descriptions of video ratings to histograms that could fit the data. +} + +@teacher{Go over correct responses as a class.} + + +=== Synthesize + +@QandA{ +@Q{What strategies did your group use in brainstorming scenarios to match histogram shape?} +@Q{For which distributions was it easiest to come up with an example?} +@Q{For which distributions was it hardest to come up with an example?} +} + + +== Histograms and Measures of Center + +@objective{histograms-and-measures-of-center} + +=== Overview + +Students apply their combined knowledge of histograms, measures of center, and shape. + +=== Launch + +Let's try computing and interpreting different measures of center from a histogram! + +@QandA{ +The histogram below shows the number of children per home in the Broadmoor neighborhood. + +@center{@image{images/number-children.png, 250}} + +@Q{What is the median number of children per home?} +@A{The median number of children per home is 1.} +@A{Strategy 1: List out the raw data (0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 7, 8) then locate the middle value.} +@A{Strategy 2: Determine the total number of homes represented by adding columns' heights (6 + 8 + 4 + 3 + 2 + 1 + 1 + 1 = 26), then locate the 13th and 14th values on the histogram.} +@A{Students may mistakenly attempt to find the midpoint of the values on the horizontal axis (4.5), indicating that they connect median with “middle”, but misunderstand _what_ middle value to find.} + +@Q{What is the mean number of children per home?} +@A{Approximately 2.04. Note that students may attempt to use the “add up and divide” algorithm with inappropriate data values from the display. Students may mistakenly compute the mean height of the bars, or the mean of values on the horizontal axis.} +} + +@teacher{Students often cannot compute measures of center from histograms because they _lack attention to the context of the data_. Cooper and Shore (2008) suggest that when students are confused, simply ask "What are the data?" to help reorient and redirect students. Similarly, urge students to discuss and defend their responses.} + + + +=== Investigate + +We were able to access all raw data by looking at the histogram displaying number children per home in the Broadmoor neighborhood. That is not always the case! + +@QandA{ +The histogram below shows the frequency of miles that Emma ran each month in 2023. + +@center{@image{images/emma-miles.png, 250}} + +@Q{Are there any months when Emma ran exactly 3 miles?} +@A{Trick question! It's impossible to tell from the display. Remind students that we cannot see individual points on a histogram, therefore will need to make approximations (and think about the effect of outliers!) when thinking about measures of center.} + +@Q{What is the mode number of miles Emma ran per month?} +@A{We can't determine exactly what the mode(s) might be, or even if there is a mode in this dataset. We can see that during most months of 2023, Emma ran between 15-20 miles.} + +@Q{Approximate the median number of miles that Emma ran per month in 2023.} +@A{The 6th and 7th values fall in the 15-20 miles bin, so the median is a value between 15 and 20 miles.} + +@Q{Which is probably greater: the median or the mean?} +@A{Because there are outliers to the left, the mean is probably less than the median.} +} + +@lesson-instruction{ +- Work with a partner to complete @printable-exercise{histograms-moc-1.adoc}. +- Which problem was the most challenging? Why did you find it challenging? +} + +@teacher{ +Discuss solutions with students. + +- Choosing "sixth graders" for the the first problem suggests that students understand the concept of mode, but looked at bar height (indicating frequency) instead of considering the value on the x-axis. + +- Ensure that when finding the median (problem 2), students do not simply locate the center of the x-axis; instead, they need to look at the data presented on the histogram. + +- When comparing the median and mean on a histogram, encourage students to approximate the location of the median and then consider the effect of outliers on the mean. + +Note: Both the problems on @printable-exercise{histograms-moc-1.adoc} and in the Launch are drawn from or inspired by research conducted by @citation{cooper-shore-2008, "Cooper and Shore (2008)"}. +} + + +=== Synthesize + +@QandA{ +@Q{How was interpreting mean, median, and mode from a histogram different than computing it from a raw dataset?} +@A{Responses will vary. Students should explain that they needed to understand the meaning of the bar height and the values on the x-axis in order to arrive at correct measures of center.} + +@Q{Describe how the relationship between mean and median can help you draw a conclusion about the skewness of a histogram. (_For example: When the mean is greater than the median, I know that..._)} +@A{When the mean is greater than the median, outliers on the right cause the display to be skewed right. When the mean is less than the median, outliers on the left cause the histogram to be skewed left. } +} + + + + + +== Histograms and Variability + +=== Overview + +Students consider variability as deviation from the mean, and then assess the variability of histograms. + +=== Launch + +@QandA{ + +We've made lots of different data visualizations for the animals in the shelter. + +@Q{Can you predict what the histogram would look like if every animal in the shelter had approximately the same weight?} +@A{The histogram would have one bar that was very tall, which would include *all* of the animals.} + +@Q{Does the histogram you described represent a dataset of _high_ or _low_ variability?} +@A{The histogram has _low_ variability: the range is small, and each of the data points are similar to one another.} +} + +So far, we have defined variability in two ways: + +- how alike or unlike the data is (categorical data) +- range (quantitative data) + +In this lesson, we will consider another way of describing variability: *deviation from the mean*. + +- If all the animals have roughly the same age, we can conclude that there will be extremely little variation from the mean. +- A large spread and the presence of outliers result in greater variation from the mean. + + +=== Investigate + +++++ + +++++ + +@QandA{ + +@Q{Which dataset below has the *least* variability from its mean? Explain.} +@A{Histogram A varies the least from its mean. The mean of the data is also the mode, and outliers are evenly distributed on both sides.} +} + +[.lettering, cols="^.^1a,^.^1a"] +|=== + +| A + +@image{images/launch-histo-c.png} +| B + +@image{images/histogram-symm.png} + +|=== + +@teacher{ +Invite a variety of students to share and explain their responses. Students commonly believe that a flatter histogram equates to less variability in the data than a bumpy histogram (@citation{kaplan-et-al-2014}). In other words, when reporting on variability, students mistakenly focus on frequency (y-axis) rather than data values (x-axis). +} + +@slidebreak + +@lesson-instruction{ +- With a partner, complete @printable-exercise{histogram-variability.adoc}. +- Provide an explanation for each response that you give. Be prepared to share your answers with the class. +} + +@teacher{ +@printable-exercise{histogram-variability.adoc} includes three challenging questions. If students are struggling, encourage them to imagine the dot plot data visualization of each histogram. Oftentimes, thinking about individual datapoints can support students strengthening their reasoning. +} + + +=== Synthesize + +@QandA{ +@Q{Many students equate variability with range. How can we determine which of two histograms shows greater variability if the two histograms have the same range?} + +@A{We can think of variability as deviation from the mean. Once we have located the mean of a histogram, we can consider if data points are more likely to fall near or far from the mean.} +} + + + +== Data Exploration Project (Visualizing Shape) + +=== Overview + +Students apply what they have learned about visualizing shape to the histograms they have created for their chosen dataset. They will add to their @starter-file{exploration-project} a more detailed interpretation of their histograms using new vocabulary. + +@teacher{Visit @lesson-link{project-data-exploration} to learn more about the sequence and scope. Teachers with time and interest can build on the exploration by inviting students to take a deep dive into the questions they develop with our @lesson-link{project-research-paper}. +} + +=== Launch + +Let’s review what we have learned about visualizing the shape of data. + +@QandA{ +@Q{Describe a histogram that is _skewed right_. Are its outliers high or low?} +@A{Values are clumped around what's typical, with low outliers.} + +@Q{Describe a histogram that is _skewed left_. Are its outliers high or low?} +@A{Values are clumped around what's typical, with high outliers.} + +@Q{Describe a histogram that is symmetric.} +@A{It’s just as likely for the variable to take a value a certain distance below the middle as it is to take a value that same distance above the middle.} +} + +=== Investigate + +Let’s connect what we know about visualizing the shape of the data to the histograms we created for your chosen dataset. + +@lesson-instruction{ +- Open your chosen dataset starter file in @proglang. +- For this analysis, you'll want to look at the Data Cycle that you completed during the Histograms lesson. +- Recreate the histograms that you made before. Now, edit and expand your discussion so that it uses the new vocabulary that you've used. +} + +@teacher{@opt{If your students who need a fresh copy of the Data Cycle template, distribute @opt-printable-exercise{data-cycle-quantitative.adoc}.}} + +@slidebreak + +@lesson-instruction{ +*It’s time to add to your @starter-file{exploration-project}.* + +- For each of the histograms that you have added, edit and / or expand upon the interpretations you provided during the Histograms lesson. +- Be sure to integrate the new vocabulary we have learned, including: @vocab{shape}, @vocab{skewed left}, @vocab{skewed right}, and @vocab{symmetric}. +- Describe what this shape tells you about the quantitative column you chose. +} + +=== Synthesize + +@teacher{Have students share their findings.} + +- What @vocab{shape}s did you notice in your histograms? +- Did you discover anything surprising or interesting about your dataset? +- Were there any surprises when you compared your findings with other students? + +@scrub{ +//// +== Additional Exercises + +- Project: @opt-printable-exercise{word-length.adoc} - A mini-project in which students use a histogram to plot the length of words in different texts. +//// +} diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/pages/data-cycle-animals-shape.adoc b/lessons/Data-Science/histograms-interpret/langs/en-us/pages/data-cycle-animals-shape.adoc similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/pages/data-cycle-animals-shape.adoc rename to lessons/Data-Science/histograms-interpret/langs/en-us/pages/data-cycle-animals-shape.adoc diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/pages/data-cycle-quantitative.adoc b/lessons/Data-Science/histograms-interpret/langs/en-us/pages/data-cycle-quantitative.adoc similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/pages/data-cycle-quantitative.adoc rename to lessons/Data-Science/histograms-interpret/langs/en-us/pages/data-cycle-quantitative.adoc diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/pages/histogram-variability.adoc b/lessons/Data-Science/histograms-interpret/langs/en-us/pages/histogram-variability.adoc new file mode 100644 index 00000000000..68440fd1749 --- /dev/null +++ b/lessons/Data-Science/histograms-interpret/langs/en-us/pages/histogram-variability.adoc @@ -0,0 +1,116 @@ += Histograms and Variability + +@n Students watched 2 videos, and rated them on a scale of 1 to 10. The average score for every video is the same (5.5). + +++++ + +++++ + +[cols="3a,9a,12a", stripes="none", grid ="none"] +|=== + +| Movie A +| @image{../images/histogram-a.png, 200} +| + +Comparing the two graphs, we know that: + +@vspace{1ex} + +- [ ] The scores for Movie A have greater variability. + +@vspace{1ex} + +@ifnotsoln{ +- [ ] The scores for Movie B have greater variability. +} +@ifsoln{ +- [x] The scores for Movie B have greater variability. +} +@vspace{1ex} + +- [ ] The scores for Movie A and Movie B have equal variability. + +@vspace{1ex} + +- [ ] It is impossible to tell from the given information. + +| Movie B +| @image{../images/histogram-e.png, 200} +| + +Explain how you arrived at your answer: + +@fitb{}{@ifsoln{The histograms are symmetric and they have the same spread.}} + +@fitb{}{@ifsoln{they both have a mean of 5. Movie A has many scores clustered}} + +@fitb{}{@ifsoln{around the mean, while movie B has a gap where the mean is.}} + +@fitb{}{@ifsoln{Movie A's scores show greater deviation from the mean.}} + +|=== + + +@n The following graphs show the distribution of quiz scores for two classes. + +[cols="3a,9a,12a", stripes="none", grid ="none"] +|=== + +| Class 1 +| @image{../images/class1quiz.png, 200} +| + +Comparing the two graphs, we know that: + +@vspace{1ex} + +- [ ] The quiz scores of Class 1 have greater variability. + +@vspace{1ex} + +@ifnotsoln{ +- [ ] The quiz scores of Class 2 have greater variability. +} +@ifsoln{ +- [x] The quiz scores of Class 2 have greater variability. +} + +@vspace{1ex} + +- [ ] The quiz scores of Class 1 and Class 2 have equal variability. + +@vspace{1ex} + +- [ ] It is impossible to tell from the given information. + +| Class 2 +| @image{../images/class2quiz.png, 200} +| + +Explain how you arrived at your answer: + +@fitb{}{@ifsoln{Although the bar heights for Class 1 show greater variability,}} + +@fitb{}{@ifsoln{the scores for Class 2 have greater variability.}} + +@fitb{}{@ifsoln{In class 1, more scores are close the the mean (between 8 and 9),}} + +@fitb{}{@ifsoln{meaning there is less deviation from the mean.}} + +|=== + + + +@n Caro says, "Flatter histograms always show less variability." Is she correct? Explain why you agree or disagree with Caro. + +@fitb{}{@ifsoln{Caro is incorrect. Varied bar heights do not necessarily indicate a varied dataset.}} + +@fitb{}{@ifsoln{Instead, Caro needs to ask if the datapoints in that make up the histogram deviate from the mean.}} + +@fitb{}{} + diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/pages/histograms-moc-1.adoc b/lessons/Data-Science/histograms-interpret/langs/en-us/pages/histograms-moc-1.adoc new file mode 100644 index 00000000000..0280322ac36 --- /dev/null +++ b/lessons/Data-Science/histograms-interpret/langs/en-us/pages/histograms-moc-1.adoc @@ -0,0 +1,89 @@ += Histograms and Measures of Center + +@n The two histograms below represent the amount how the number of minutes spent traveling to school, one for a sample of sixth grade students and one for a sample of eighth grade students. + +[cols="^1a,^1a"] +|=== + +| *Distribution of Travel Times (Sixth Grade)* +@image{../images/travel-time-a.png, 200} +| *Distribution of Travel Times (Eighth Grade)* +@image{../images/travel-time-b.png, 200} + +|=== + + +Which group has the larger mode? @hspace{1em} sixth graders @hspace{2em} @ifsoln-choice{eighth graders} @hspace{2em} the modes are roughly the same + + +@n The histogram below shows the ages of the 19 children who signed up for rock climbing camp. + +[cols="^3a,<4a"] +|=== + +| *Distribution of Rock Climbers' Ages* +@image{../images/rock-climbing-ages.png, 210} +| The median age for these 19 climbers is: + +@vspace{1ex} + +☐ about 6 or 7 + +@vspace{1ex} + +@ifsoln-choice{☐ about 8 or 9} + +@vspace{1ex} + +☐ about 10 or 11 + +@vspace{1ex} + +☐ about 12 or 13 + +|=== + +Explain how you determined the median value: @fitb{}{@ifsoln{Possible response: I knew that the 10th value is the median, because there }} + +@fitb{}{@ifsoln{are 19 children represented. I located the 10th value on the histogram, and it was in the 8-10 interval.}} + +@fitb{}{} + +@fitb{}{} + + +@n Eleven students were asked to solve a logic puzzle. The minimum time was 5 minutes, and the maximum time was 35 minutes. Times were rounded to the nearest 5 minutes. The distribution of their times is shown on the histogram below. + + +[cols="^3a,<4a"] +|=== + +| *Distribution of Time Spent Solving a Logic Puzzle* + +@image{../images/logic-puzzle.png, 210} +| Which of the following statements is likely a correct comparison of the mean and median number of minutes spent working? + +@vspace{1ex} + +☐ The mean time is less than the median time. + +@vspace{1ex} + +☐ The mean time is equal to the median time. + +@vspace{1ex} + +@ifsoln-choice{☐ The mean time is greater than the median time.} + +@vspace{1ex} + +☐ It is impossible to determine which time is greater from the given graph. +|=== + +Explain how you arrived at your choice: @fitb{}{@ifsoln{The outliers on the right pull the mean to the right,}} + +@fitb{}{@ifsoln{resulting in a greater mean than median.}} + +@fitb{}{} + +@fitb{}{} diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/pages/histograms-moc.adoc b/lessons/Data-Science/histograms-interpret/langs/en-us/pages/histograms-moc.adoc new file mode 100644 index 00000000000..2bfefd5c3d2 --- /dev/null +++ b/lessons/Data-Science/histograms-interpret/langs/en-us/pages/histograms-moc.adoc @@ -0,0 +1,90 @@ += Histograms and Measures of Center + +@n The two histograms below represent the amount of money spent on a pair of jeans, one for a sample of sixth grade students and one for a sample of eighth grade students. + +[cols="^1a,^1a"] +|=== + +| *Distribution of Cost of Jeans (Sixth Grade)* +@image{../images/jeans-cost-6th.png, 200} +| *Distribution of Cost of Jeans (Eighth Grade)* +@image{../images/jeans-cost-8th.png, 200} + +|=== + + +Which group has the larger mode? @hspace{1em} @ifsoln-choice{sixth graders} @hspace{2em} eighth graders @hspace{2em} the modes are roughly the same + + +@n The histogram below shows the Verbal SAT scores for 205 students entering a local college in the fall of 2002. + +[cols="^3a,<4a"] +|=== + +| *Distribution of Verbal SAT Scores* +@image{../images/sat-verbal.png, 210} +| The median score for these 205 students is: + +@vspace{1ex} + +☐ about 40 or 41 + +@vspace{1ex} + +☐ between 19 and 26 + +@vspace{1ex} + +@ifsoln-choice{☐ between 500 and 600} + +@vspace{1ex} + +☐ between 400 and 500 + +|=== + +Explain how you determined the median value: @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + + +@n A study was conducted to examine the standard of living for typical families in Knoxville. The following graph displays the distribution of family income for those in the town of Knoxville. + + +[cols="^3a,<4a"] +|=== + +| *Distribution of Incomes per Family in Knoxville* + +@image{../images/income.png, 210} +| Which of the following statements is likely a correct comparison of the mean and median family income in Knoxville? + +@vspace{1ex} + +☐ The mean income is less than the median income. + +@vspace{1ex} + +☐ The mean income is equal to the median income. + +@vspace{1ex} + +☐ The mean income is greater than the median income. + +@vspace{1ex} + +☐ It is impossible to determine which measure is larger from the given graph. +|=== + +Explain how you arrived at your choice: @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/pages/notes-DUPLICATE-histograms.adoc b/lessons/Data-Science/histograms-interpret/langs/en-us/pages/notes-DUPLICATE-histograms.adoc similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/pages/notes-DUPLICATE-histograms.adoc rename to lessons/Data-Science/histograms-interpret/langs/en-us/pages/notes-DUPLICATE-histograms.adoc diff --git a/lessons/Data-Science/histograms/langs/en-us/pages/reading-histograms-matching.adoc b/lessons/Data-Science/histograms-interpret/langs/en-us/pages/reading-histograms-matching.adoc similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/pages/reading-histograms-matching.adoc rename to lessons/Data-Science/histograms-interpret/langs/en-us/pages/reading-histograms-matching.adoc diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/pages/use-shape-to-interpret.adoc b/lessons/Data-Science/histograms-interpret/langs/en-us/pages/use-shape-to-interpret.adoc new file mode 100644 index 00000000000..21ecd00a6cd --- /dev/null +++ b/lessons/Data-Science/histograms-interpret/langs/en-us/pages/use-shape-to-interpret.adoc @@ -0,0 +1,87 @@ += Using Shape to Interpret Data + +Read each scenario. Draw a *rough* histogram sketch (you do not need to label the axes), then decide if the histogram is skew left, skew right, or symmetric. Explain your interpretation. + + +@n In the United States, there are a few billionaires that have far greater incomes than the average (about $28,000). + + +[.FillVerticalSpace, cols="<.3a,.^4a"] +|=== + +| Rough histogram sketch: | *Circle one:* @hspace{2em} _skew left @hspace{4em} skew right @hspace{4em} symmetric_ + +Explain your choice: @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + +|=== + + + +@n A school cafeteria mostly buys canned goods in huge sizes (48-64 ounces), but also purchases a few ingredients in smaller sizes (4-8 ounces). + + +[.FillVerticalSpace, cols="<.3a,.^4a"] +|=== + +| Rough histogram sketch: | *Circle one:* @hspace{2em} _skew left @hspace{4em} skew right @hspace{4em} symmetric_ + +Explain your choice: @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + +|=== + + +@n It's just as likely for a newborn baby to be a certain number of ounces below the average weight (approximately 7.5 pounds) as it is to be that number of ounces above the average weight. + + +[.FillVerticalSpace, cols="<.3a,.^4a"] +|=== + +| Rough histogram sketch: | *Circle one:* @hspace{2em} _skew left @hspace{4em} skew right @hspace{4em} symmetric_ + +Explain your choice: @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + +|=== + +@n At many restaurants, the busiest dinner time is around 7pm, but there are always a few people who want to eat earlier or later. + + +[.FillVerticalSpace, cols="<.3a,.^4a"] +|=== + +| Rough histogram sketch: | *Circle one:* @hspace{2em} _skew left @hspace{4em} skew right @hspace{4em} symmetric_ + +Explain your choice: @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@fitb{}{} + +|=== diff --git a/lessons/Data-Science/histograms-interpret/langs/en-us/pages/workbook-pages.txt b/lessons/Data-Science/histograms-interpret/langs/en-us/pages/workbook-pages.txt new file mode 100644 index 00000000000..a4fe084325b --- /dev/null +++ b/lessons/Data-Science/histograms-interpret/langs/en-us/pages/workbook-pages.txt @@ -0,0 +1,5 @@ +use-shape-to-interpret.adoc +reading-histograms-matching.adoc +histograms-moc-1.adoc +histogram-variability.adoc +data-cycle-animals-shape.adoc diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/proglang.txt b/lessons/Data-Science/histograms-interpret/langs/en-us/proglang.txt similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/proglang.txt rename to lessons/Data-Science/histograms-interpret/langs/en-us/proglang.txt diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/slides-codap.id b/lessons/Data-Science/histograms-interpret/langs/en-us/slides-codap.id similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/slides-codap.id rename to lessons/Data-Science/histograms-interpret/langs/en-us/slides-codap.id diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/slides-pyret.id b/lessons/Data-Science/histograms-interpret/langs/en-us/slides-pyret.id similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/slides-pyret.id rename to lessons/Data-Science/histograms-interpret/langs/en-us/slides-pyret.id diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/assessments.adoc b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/assessments.adoc new file mode 100644 index 00000000000..1422d31ba1b --- /dev/null +++ b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/assessments.adoc @@ -0,0 +1,22 @@ += Assessments + +== Published Assessment + +- Desmos File: + +== Question Text + +@n question 1 + +- objective: + +@n question 2 + +- objective: + +== Links to Documents Used for Thinking and Generating Content + +- Desmos file? +- Pyret file? +- Google Doc? +- Google Sheet? \ No newline at end of file diff --git "a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-17 at 4.40.34\342\200\257PM.png" "b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-17 at 4.40.34\342\200\257PM.png" new file mode 100644 index 00000000000..d908bd93d1b Binary files /dev/null and "b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-17 at 4.40.34\342\200\257PM.png" differ diff --git "a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.28.02\342\200\257PM.png" "b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.28.02\342\200\257PM.png" new file mode 100644 index 00000000000..5fd7eb6f5c2 Binary files /dev/null and "b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.28.02\342\200\257PM.png" differ diff --git "a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.40.38\342\200\257PM.png" "b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.40.38\342\200\257PM.png" new file mode 100644 index 00000000000..2c3ae41f6bd Binary files /dev/null and "b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.40.38\342\200\257PM.png" differ diff --git "a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.40.49\342\200\257PM.png" "b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.40.49\342\200\257PM.png" new file mode 100644 index 00000000000..3657500cc27 Binary files /dev/null and "b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.40.49\342\200\257PM.png" differ diff --git "a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.49.39\342\200\257PM.png" "b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.49.39\342\200\257PM.png" new file mode 100644 index 00000000000..05e06714621 Binary files /dev/null and "b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.49.39\342\200\257PM.png" differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/attack.png b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/attack.png new file mode 100644 index 00000000000..9d0f87d6773 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/attack.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/defense.png b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/defense.png new file mode 100644 index 00000000000..13f7b559b1d Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/defense.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds10.png b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds10.png new file mode 100644 index 00000000000..b1b69015d0e Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds10.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds100.png b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds100.png new file mode 100644 index 00000000000..38abd7bc6d5 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds100.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds100b.png b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds100b.png new file mode 100644 index 00000000000..e560c4eda07 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds100b.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds10b.png b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds10b.png new file mode 100644 index 00000000000..c571c19edfa Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds10b.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds25.png b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds25.png new file mode 100644 index 00000000000..40f986ff0e5 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds25.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds25b.png b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds25b.png new file mode 100644 index 00000000000..28243462b56 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds25b.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds5.png b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds5.png new file mode 100644 index 00000000000..fe8bb522e19 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds5.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds5a.png b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds5a.png new file mode 100644 index 00000000000..5f357e1302d Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds5a.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds5b.png b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds5b.png new file mode 100644 index 00000000000..0c45857322f Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/pounds5b.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/weeks.png b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/weeks.png new file mode 100644 index 00000000000..0bb5f21df1b Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/assessments/images-used-in-assessments/weeks.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/cat-pounds.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/cat-pounds.png new file mode 100644 index 00000000000..db0a6c2f84c Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/cat-pounds.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/dog-pounds-small.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/dog-pounds-small.png new file mode 100644 index 00000000000..afb9c003a9f Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/dog-pounds-small.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/dog-pounds.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/dog-pounds.png new file mode 100644 index 00000000000..563b11bea7d Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/dog-pounds.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/dot-plot-weeks.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/dot-plot-weeks.png new file mode 100644 index 00000000000..d59a4e82d37 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/dot-plot-weeks.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoA.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoA.png new file mode 100644 index 00000000000..9287523b4f6 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoA.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoB.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoB.png new file mode 100644 index 00000000000..986d182804b Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoB.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoC.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoC.png new file mode 100644 index 00000000000..8b6673fad82 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoC.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoD.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoD.png new file mode 100644 index 00000000000..6b78286c023 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoD.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoE.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoE.png new file mode 100644 index 00000000000..674270b0d6c Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoE.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoF.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoF.png new file mode 100644 index 00000000000..3a737762fa5 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoF.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoG.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoG.png new file mode 100644 index 00000000000..125e144ae15 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoG.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoH.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoH.png new file mode 100644 index 00000000000..42344751d48 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoH.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoI.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoI.png new file mode 100644 index 00000000000..2f75a518631 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoI.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoJ.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoJ.png new file mode 100644 index 00000000000..30db0f2a1bd Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoJ.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoK.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoK.png new file mode 100644 index 00000000000..5691952e7ac Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoK.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoL.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoL.png new file mode 100644 index 00000000000..edba1e0c7e7 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoL.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoM.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoM.png new file mode 100644 index 00000000000..4d6c7bee4f6 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoM.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoN.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoN.png new file mode 100644 index 00000000000..69cac432661 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoN.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoO.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoO.png new file mode 100644 index 00000000000..d75451c1431 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoO.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoP.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoP.png new file mode 100644 index 00000000000..3a64b347baa Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoP.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoQ.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoQ.png new file mode 100644 index 00000000000..58efc1ebdca Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoQ.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoR.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoR.png new file mode 100644 index 00000000000..f9d2c3409be Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histoR.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-a.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-a.png new file mode 100644 index 00000000000..d1aac8c33a3 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-a.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-b.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-b.png new file mode 100644 index 00000000000..3f5ce4e3ce9 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-b.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-c.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-c.png new file mode 100644 index 00000000000..133927420fb Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-c.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-d.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-d.png new file mode 100644 index 00000000000..634137f75d4 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-d.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-e.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-e.png new file mode 100644 index 00000000000..e98349e9185 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/histogram-e.png differ diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/left-w-foot.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/left-w-foot.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/left-w-foot.png rename to lessons/Data-Science/histograms-visualize/langs/en-us/images/left-w-foot.png diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/lesson-images.json b/lessons/Data-Science/histograms-visualize/langs/en-us/images/lesson-images.json new file mode 100644 index 00000000000..5badcd1a2e1 --- /dev/null +++ b/lessons/Data-Science/histograms-visualize/langs/en-us/images/lesson-images.json @@ -0,0 +1,173 @@ +{ + "histogram-a.png": { + "description": "Histogram with 6 columns whose respective heights from left to right are 1, 1, 3, 3, 1, 1", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histogram-b.png": { + "description": "histogram with 10 bars equal in height based on contrived data", + "source" : "Created by the Bootstrap Team", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histogram-c.png": { + "description": "Histogram with 9 bars whose heights from left to right are 0,1,3,3,1,0,0,1,1", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histogram-d.png": { + "description": "Histogram with 9 bars whose heights from left to right are 0,1,1,0,0,1,3,3,1", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histogram-e.png": { + "description": "Histogram with 11 bars whose heights from left to right are 0,2,2,1,0,0,0,0,1,2,2", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "pounds.png": { + "description": "Histogram for the pounds column of the animals dataset", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "weeks.png": { + "description": "Histogram for the weeks column of the animals dataset", + "source": "Created by the Bootstrap Team", + "license": "Creative Commons 4.0 - NC - SA" + }, + "teeth-histogram.png": { + "description": "histogram with bin width of five: 0 to 4, 5 to 9, etc. ready to be filled in", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "teeth-histogram-soln.png": { + "description": "Teeth histogram", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "symmetric.png": { + "description": "A hill-shaped histogram, with both sides sloping away from the peak equally", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "left-w-foot.png": { + "description": "A hill-shaped histogram, with a clump of taller bars on the right side, and smaller bars trailing off to the left=", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "right-w-foot.png": { + "description": "A hill-shaped histogram, with a clump of taller bars on the left side, and smaller bars trailing off to the right side", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "majors-bar-chart.png": { + "description": "A bar chart displaying student enrollment by major, with the bars arranged tallest to shortest", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "dog-pounds.png": { + "description": "A histogram displaying the weights of a group of dogs from a large shelter", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "dog-pounds-small.png": { + "description": "A histogram displaying the weights of dogs from a small shelter", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "cat-pounds.png": { + "description": "A histogram displaying the weights of cats", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoA.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoB.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoC.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoD.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoE.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoF.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoG.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoH.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoI.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoJ.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoK.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoL.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoM.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoN.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoO.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoP.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoQ.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "histoR.png": { + "description": "A histogram ", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + } +} + diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/majors-bar-chart.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/majors-bar-chart.png new file mode 100644 index 00000000000..cb1b656a392 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/majors-bar-chart.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/pounds.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/pounds.png new file mode 100644 index 00000000000..1035a0f2363 Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/pounds.png differ diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/right-w-foot.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/right-w-foot.png similarity index 100% rename from lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/right-w-foot.png rename to lessons/Data-Science/histograms-visualize/langs/en-us/images/right-w-foot.png diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/symmetric.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/symmetric.png new file mode 100644 index 00000000000..22752a5500c Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/symmetric.png differ diff --git a/lessons/Data-Science/histograms/langs/en-us/images/teeth-histogram-soln.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/teeth-histogram-soln.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/images/teeth-histogram-soln.png rename to lessons/Data-Science/histograms-visualize/langs/en-us/images/teeth-histogram-soln.png diff --git a/lessons/Data-Science/histograms/langs/en-us/images/teeth-histogram.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/teeth-histogram.png similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/images/teeth-histogram.png rename to lessons/Data-Science/histograms-visualize/langs/en-us/images/teeth-histogram.png diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/images/weeks.png b/lessons/Data-Science/histograms-visualize/langs/en-us/images/weeks.png new file mode 100644 index 00000000000..30a4114f65c Binary files /dev/null and b/lessons/Data-Science/histograms-visualize/langs/en-us/images/weeks.png differ diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/index.adoc b/lessons/Data-Science/histograms-visualize/langs/en-us/index.adoc new file mode 100644 index 00000000000..448e798e3e0 --- /dev/null +++ b/lessons/Data-Science/histograms-visualize/langs/en-us/index.adoc @@ -0,0 +1,336 @@ += Visualizing the "Shape" of the Data + +@description{Students learn to create histograms by hand and in @proglang. They practice reading and describing histograms, using new vocabulary to describe histogram shape.} + +@ifproglang{pyret}{ +@lesson-prereqs{contracts-tables-visualizations, dot-plots} +} + +@ifproglang{codap}{ +@lesson-prereqs{codap-dot-plots-bar-charts} +} + +@keywords{histogram, bin, interval} + +@add-to-lang{histogram} + +[@lesson-intro-table] +|=== +| Lesson Goals +| Students will be able to... + +@objectives + +| Student-facing Lesson Goals +| + +- Let's create histograms for datasets and learn how describe their shape. + +| Materials +|[.materials-links] + +@material-links + +| Preparation +| +@preparation{ +- For the @handout{histogram-cards.adoc} activity in this lesson you will need to print and cut one set of cards for each pair of students in your class. We recommend storing the cards in envelopes. +- There is an optional kinesthetic activity in this lesson that requires a ball of play-dough for each group of 3. +- There is an @starter-file{histograms, interactive Desmos activity} in the lesson, as well as Desmos formative assessments. + * Open the Desmos activity you plan to use and make a code or link to share with students. + * If you are using our Google Slides, add the appropriate link to the slide deck. + * _If you're a first-time Desmos user, fear not!_ @dist-link{pages/desmos-instructions.adoc, _Here's what you need to do._} +} + +| Assessments +| @assessments + +|=== + + +== Creating and Reading Histograms + +@objective{make-histograms} +@objective{histogram-center-spread} + +=== Overview + +Students create histograms by hand and in Pyret, and then use the histograms to respond to questions about the data. + +=== Launch + +We have already discussed how histograms are similar to and different from dot plots: both display the frequency and distribution of quantitative data--but histograms give us a collective overview of the data, while dot plots allow us to see individual points. + +During this lesson, we will get comfortable making histograms by hand and in @proglang. + +@lesson-instruction{ +- Turn to the first section of @printable-exercise{making-histograms.adoc} and use the data provided to complete the frequency table and corresponding data visualization. +- When you're done, open @starter-file{tooth-data} and complete the second section of the page using @proglang. +} + + +=== Investigate + +Let's practice reading histograms. + +@teacher{If your students are new to histograms, you may want review the first section before having students move on to the second. Some misconceptions that commonly emerge are highlighted @ifnotslide{below}@ifslide{on the next slide}.} + + +@lesson-instruction{ +- Complete @printable-exercise{reading-histograms.adoc} +} + +=== Common Misconceptions + +- The tallest bar on a histogram does *not* necessarily represent the majority. A majority requires more than half of the data points. We need to ask ourselves: Is more than half of the data represented by the tallest bar? + +- Pay attention to what each axis measures! A tall histogram bar does not indicate a heavier dog. The height of the bar indicates frequency (how many dogs fall into a given 20-pound weight interval). + +- Unlike dot plots, histograms display *aggregate* data, meaning that it is impossible to identify single data points. The first histogram does *not* show us that one dog weighs exactly 140 pounds. There might be a dog with that weight, but we can't know for sure unless we look at the dataset (or use our mouse to interact with the display in Pyret). + +@teacher{ +The common misconceptions cited above are from work by @citation{kaplan-et-al-2014, "Kaplan et al"} (2014), @citation{cooper-shore-2008, "Cooper and Shore"} (2008), and @citation{bakker-et-al-2005, "Bakker et al"} (2005). +} + +=== Synthesize + +@QandA{ +@Q{What does the height of a histogram bar indicate?} +@A{Histogram bar height tells us about the frequency of that data falling in a given bin.} + +@Q{Can we use a histogram to determine the exact range of a dataset?} +@A{No: A histogram's bars provide an aggregate view of the data.} +@A{We cannot identify single data points, so we can neither identify the minimum n +or maximum.} +@A{The lowest data point could fall toward the low or high end of the first bin, or anywhere in between. And the highest data point could fall anywhere in the last bin.} + +@Q{Can we identify how *many* data points are in a dataset from a histogram?} +@A{Yes. Assuming that the y-axis is clearly labeled, we can add the bar heights to determine how many data points are in a dataset. This of course becomes more complicated when we have larger datasets.} + +} + + +== Describing Shape + +@objective{peaks-clusters-gaps} +@objective{histogram-skew-or-symmetric} +@objective{bar-chart-v-histogram} + +=== Overview + +This activity focuses on _describing shape_ from a @vocab{histogram}. Students explore shape by considering symmetry, peaks, clusters, and skew. + +=== Launch + +When we have a large dataset and we want a more collective overview of the data, histograms shine and dot plots become impractical. Histograms are valuable because they help us to see shape. + +@teacher{ +The @handout{histogram-cards.adoc} activity students are about to complete requires some teacher preparation. Make sure you've printed and cut out a set of cards for each pair of students before proceeding. + +If that preparation is unrealistic for you, project the images for students to refer to as they work through this section and modify the directions accordingly. (Viewing all of the images at once, rather than as individual cards, requires a higher cognitive load for students, so we don't recommend it.) +} + + +@QandA{ +Your teacher will give you and your partner an envelope containing lettered histogram cards. Lay out the cards in front of you. + +@Q{What do you Notice about the histograms?} + +@Q{What do you Wonder about the histograms?} +} + +Let's sort some histograms! + +@lesson-instruction{ +- With your partner, use the histogram cards to sort the cards into two piles: histograms that are roughly symmetrical, and histograms that are not roughly symmetrical. +- Be prepared to share your groupings with the class. +} + +@teacher{ +If students are not familiar with the concept of "symmetry", spend some time briefly explaining that it exists the right and left side of the data visualization are mirror images. Ensure that students agree on which histograms are symmetrical and which ones are not before proceeding with the activity. +} + + +=== Investigate + +@lesson-instruction{ +- With your partner, use the histogram cards to complete @printable-exercise{sorting-histograms.adoc}. +- We will pause for discussion after you complete the first section. +} + +@teacher{ +As you circulate, make note of the different groupings that students use; this will help with facilitation of class discussion and debrief. Encourage students to use appropriate vocabulary (i.e., peak, cluster, gap). +} + +Let's debrief. + +@QandA{ +@Q{How did you group your symmetrical histograms?} +@A{Responses will vary. Students may have grouped their histograms by the number of peaks, the number of gaps, or the location of the center. There are no wrong answers here.} + +@Q{How can you determine what's typical of a symmetrical histogram?} +} + +@lesson-instruction{ +- With your partner, complete the second section of @printable-exercise{sorting-histograms.adoc}. +} + +@QandA{ +@Q{How did you group your symmetrical histograms?} +@A{Possible groupings: no peaks, 1 peak, 2 peaks; 1 gap, no gaps; and center at X, center at Y.} + +@Q{How can you determine what's typical of a symmetrical histogram?} +@A{The center of a symmetrical histogram is the line of symmetry. A common misconception is arguing that the center of the axis is the center of the histogram; students must also consider the range of the histogram.} +} + +You probably noticed that some histograms trail off to the left, and others trail off to the right. Statisticians refer to this trailing as "skew". Let's compare skew-left, skew-right, and symmetric histogram shapes. + + + +[cols="^.^1a,^.^1a,^.^1a", options="header"] +|=== + +| Symmetric | Skew-left | Skew-right + +| @image{images/symmetric.png, 150 }| @image{images/left-w-foot.png, 150} | @image{images/right-w-foot.png, 150} + +| Values are balanced on either side of the center. + +| Values are clumped around what's typical, but trail off to the right. + +| Values are clumped around what's typical, but trail off to the left. + +|=== + +Skew-left distributions look like the toes on your left foot, and skew-right distributions look like the toes on your right foot! + +@vocab{Shape} is useful because it enables us to quickly identify and describe trends in data. When we recognize a particular shape, we can immediately draw conclusions about a dataset! We'll dig deeper into that idea in our lesson on @lesson-link{histograms-interpret}.) + +*Shape is one of a few key features that sets histograms apart from bar charts!* + +@lesson-instruction{ +Complete @printable-exercise{bar-chart-v-histogram.adoc}. +} + +@teacher{ +@printable-exercise{bar-chart-v-histogram.adoc} surfaces two common student misconceptions about bar graphs that @citation{whittaker-jacobbe-2017, "Whittaker and Jacobbe (2017)"} cite in their research. + +@centered-image{images/majors-bar-chart.png, 350} + +- First, students commonly fail to understand that skew is associated with *distributions of quantitative variables*. This is why a bar graph with its bars arranged in increasing or decreasing order does not display a skewed distribution. +- Students also commonly believe that the category with the greatest value represents a majority of the responses. In this dataset, that is not the case: the bar representing the college of science does not represent more than half of the students. +} + +Because a bar chart displays categorical data, we can order the bars anyway that we wish. That is not the case with histograms: since quantitative data must follow a natural order, a histogram's bars cannot be re-ordered. + +@QandA{ +@Q{What are some ways that bar charts and histograms are alike?} +@A{Both bar charts and histograms display frequency. Both displays have bars!} + +@Q{What are some ways that bar charts and histograms are different?} +@A{Bar charts display categorical data, while histograms display quantitative data. A histogram's bars touch, and a bar chart's bars do not. Histograms have shape, so their bars cannot be reordered. Bar chart bars can be reordered.} +} + +=== Synthesize + +@QandA{ + +@Q{Envision a skew-left histogram. Where do you think its outliers are? Explain how you know.} +@A{A skew-left histogram trails off to the right. The histogram trails because there are very few data points (the outliers) to the right, rather than many (causing tall bars).} + +@Q{Why do histograms have shape but bar charts do not?} +@A{Because histograms are quantitative, their bars must appear in numerical order. Bar charts show categorical data, meaning that the bars can be arranged in any way.} + +} + +@teacher{ + +Want to check student mastery of the content you've just taught? Administer @assessment{histograms-check1-desmos} to get a snapshot of your students' current level of mastery. Make sure you have created a link or code for your class to the assessment. + +Alternatively, we offer a compilation of both Checkpoints in @assessment{histograms-cumulative-desmos}. +} + + +== Choosing the Right Bin Size + +@objective{choosing-bin-size} + +=== Overview +Students learn to make histograms from the animals-dataset in @proglang and explore the importance of choosing the right bin size in order for a histogram to show us the shape of the data. + +=== Launch + +@QandA{ +@Q{What is a bin on a histogram? And what do we already know about them?} +@A{The goal here is to hear what students have internalized so far. This is not the moment to teach about bins as we are about to delve into a deep exploration.} +@A{We would expect that students have some sense that bins are the intervals that data gets grouped into.} +} + +Bins that are too small will hide the shape of the data by breaking it into too many short bars. Bins that are too large will hide the shape by squeezing the data into just a few tall bars. So far, the bins were provided for you. But how do you choose a good bin-size? + +@teacher{Make sure you have created a link or code for your class to @starter-file{histograms}.} + +@lesson-instruction{ +- Open the *Desmos* link I've shared with you. (The file should be called *Histogram Bin Size Exploration*.) +- Use the Bin Size slider to explore how changing the bin size impacts the shape of the histogram and what we can learn about the distribution of the data. +- Record your notices and wonders in the space provided on Slide 1. +- Before moving on to Slide 2, be sure to click the "New Dataset" button and see if you notice and wonder anything new. +- When you're done exploring Slide 1, move on to Slide 2 and answer the questions. +} + +=== Investigate +Suppose we want to know how long it takes for animals from the shelter to be adopted. + +@lesson-instruction{ +- Log into @starter-file{program-list}, open your saved Animals Starter File (or @starter-file{animals, make a new copy}), and click "Run". +- Complete @printable-exercise{choosing-bin-size.adoc}. +} + +@slidebreak + +@QandA{ +@Q{What did you Notice?} +@A{We see most of the histogram's area under the two bars between 0 and 10 weeks, so we can say it was most common for an animal to be adopted in 10 weeks or less.} +@A{We see a small amount of the histogram's area trailing out to unusually high values, so we can say that a couple of animals took an unusually long time to be adopted: one took even more than 30 weeks.} +@A{More than half of the animals (17 out of 31) took just 5 weeks or less to be adopted. But the few unusually long adoption times pulled the average up to 5.8 weeks.} + +@Q{What was a typical adoption time?} +@A{Almost all of the animals were adopted in 10 weeks or less, but a couple of animals took an unusually long time to be adopted -- even more than 20 or 30 weeks!} +@A{Be sure to draw attention to the fact that it would have been hard to give this summary by reading through the table, but the histogram makes it easy to see!} + +@Q{What bin sizes worked best for analyzing `adoption`?} +@A{Have students talk about the bin sizes they tried. Encourage open discussion as much as possible here, so that students can make their own meaning about bin sizes before moving on to the next point.} +} + +@slidebreak + +@lesson-point{ +Rule of thumb: a histogram should have between 5–10 bins. +} + +Histograms are a powerful way to display a dataset and assess its @vocab{shape}. Choosing the right bin size for a column has a lot to do with how data is distributed between the smallest and largest values in that column! With the right bin size, we can see the _shape_ of a quantitative column. + +@teacher{ +But how do we talk about or describe that shape, and what does the shape actually tell us? + +Our @lesson-link{histograms-interpret} lesson addresses these questions and explores how histogram shape affects the mean (average). +} + +=== Synthesize + +@QandA{ + +@Q{What would the histogram look like if most of the animals took more than 20 weeks to be adopted, but a couple of them were adopted in fewer than 5 weeks?} +@A{The histogram would be skewed left, with a peak on the right.} + + +@Q{What would the histogram look like if every animal was adopted in roughly the same length of time?} +@A{All of the animals would be stacked into one very tall bar.} +} + +@teacher{ + +Want to check student mastery of the content you've just taught? Administer @assessment{histograms-check2-desmos} to get a snapshot of your students' current level of mastery. Make sure you have created a link or code for your class to the assessment. + +Alternatively, we offer a compilation of both Checkpoints in @assessment{histograms-cumulative-desmos}. +} diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/pages/bar-chart-v-histogram.adoc b/lessons/Data-Science/histograms-visualize/langs/en-us/pages/bar-chart-v-histogram.adoc new file mode 100644 index 00000000000..6c02b957c79 --- /dev/null +++ b/lessons/Data-Science/histograms-visualize/langs/en-us/pages/bar-chart-v-histogram.adoc @@ -0,0 +1,66 @@ += Bar Charts Versus Histograms + + + +A university consists of six colleges. Each student in the university has chosen to enroll in one of these colleges. The *bar chart* below shows the distribution of college choice. + +@centered-image{../images/majors-bar-chart.png, 310} + + + + +== Differences and Similarities + +@vspace{1ex} + +Respond to the prompts to complete the table below. If it helps you to look at a histogram as you complete the table, you can refer to @printable-exercise{making-histograms.adoc}. + +[cols=".^2a,.^1a,.^1a",options="header"] +|=== + +| | Bar Chart | Histogram +| Displays frequency: yes or no? | @ifsoln{yes} | @ifsoln{yes} +| Type of data: categorical or quantitative? | @ifsoln{categorical}| @ifsoln{quantitative} +| Bars touch: yes or no? | @ifsoln{no} | @ifsoln{yes} +| Bars can be reordered: yes or no? | @ifsoln{yes} | @ifsoln{no} +| The shape of the data matters: yes or no? | @ifsoln{no} | @ifsoln{yes} +|=== + +@n What are some of the ways that bar charts and histograms are *alike*? _Summarize your conclusions from the table._ @fitb{}{} + +@fitb{}{@ifsoln{Both display frequency. Both show bars.}} + +@fitb{}{} + + +@n What are some of the ways that bar charts and histograms are *different*? _Summarize your conclusions from the table._ @fitb{}{} + +@fitb{}{@ifsoln{Bar charts display categorical data, while histograms display quantitative data. A histogram's bars touch, and a bar chart's bars do not.}} + +@fitb{}{@ifsoln{Histograms have shape, so their bars cannot be reordered. Bar chart bars can be reordered.}} + + + +== Distribution of College Choice +@vspace{1ex} + +Four different students share their conclusions about the display above. Only *one* of those conclusions is correct. Respond whether you agree or not, and then explain your stance. + +Student A: "The distribution is skewed to the left." @fitb{}{@ifsoln{This is incorrect. There cannot be a skew because the display is a bar chart,}} + +@fitb{}{@ifsoln{which shows categorical data.}} + + +Student B: "The distribution is skewed to the right." @fitb{}{@ifsoln{This is incorrect. There cannot be a skew because the display is a bar chart, }} + +@fitb{}{@ifsoln{which shows categorical data.}} + +Student C: "The majority of students are enrolled in the college of science." @fitb{}{@ifsoln{This is incorrect. The tallest bar does not}} + +@fitb{}{@ifsoln{necessarily represent the majority. In this case, fewer than half of students are in the college of science.}} + + +Student D: "After science and education, there is a large drop in enrollments for the other colleges." @fitb{}{} + +@fitb{}{@ifsoln{This is correct. Science and education have far more enrollments than the other colleges. }} + diff --git a/lessons/Data-Science/histograms/langs/en-us/pages/choosing-bin-size.adoc b/lessons/Data-Science/histograms-visualize/langs/en-us/pages/choosing-bin-size.adoc similarity index 100% rename from lessons/Data-Science/histograms/langs/en-us/pages/choosing-bin-size.adoc rename to lessons/Data-Science/histograms-visualize/langs/en-us/pages/choosing-bin-size.adoc diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/pages/histogram-cards.adoc b/lessons/Data-Science/histograms-visualize/langs/en-us/pages/histogram-cards.adoc new file mode 100644 index 00000000000..91570c182f2 --- /dev/null +++ b/lessons/Data-Science/histograms-visualize/langs/en-us/pages/histogram-cards.adoc @@ -0,0 +1,78 @@ +== Histograms Card Sort + +Cut out one set of cards for every pair of students. Instructions for this sorting activity can be found on @printable-exercise{sorting-histograms.adoc}. + +@vspace{1ex} + +++++ + +++++ + +[.lettering, cols="^.^1a,^.^1a,^.^1a", header="none"] +|=== +| A + +@image{../images/histoA.png, 170} +| B + +@image{../images/histoB.png, 170} +| C + +@image{../images/histoC.png, 170} +| D + +@image{../images/histoD.png, 170} + +| E + +@image{../images/histoE.png, 170} +| F + +@image{../images/histoF.png, 170} +| G + +@image{../images/histoG.png, 170} +| H + +@image{../images/histoH.png, 170} + +| I + +@image{../images/histoI.png, 170} +| J + +@image{../images/histoJ.png, 170} +| K + +@image{../images/histoK.png, 170} +| L + +@image{../images/histoL.png, 170} + +| M + +@image{../images/histoM.png, 170} +| N + +@image{../images/histoN.png, 170} +| O + +@image{../images/histoO.png, 170} +| P + +@image{../images/histoP.png, 170} + +| Q + +@image{../images/histoQ.png, 170} +| R + +@image{../images/histoR.png, 170} + +|=== + + diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/pages/making-histograms.adoc b/lessons/Data-Science/histograms-visualize/langs/en-us/pages/making-histograms.adoc new file mode 100644 index 00000000000..cf9e7f0e569 --- /dev/null +++ b/lessons/Data-Science/histograms-visualize/langs/en-us/pages/making-histograms.adoc @@ -0,0 +1,60 @@ += Making Histograms + +== By Hand + +Suppose we have a dataset for a group of 50 adults, showing the number of teeth each person has... + +@vspace{1ex} + +@indented{ +@big{ +0, 0, 0, 0, 0, 22, 26, 27, 28, 28, 28, 28, 29, 29, 29, 30, 30, 30, 30, 30, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 +} +} + +@n Use the data to complete the frequency table below. (One row has been completed for you.) + +@vspace{1ex} + +[cols="2a,^1a,^1a,^1a,^1a,^1a,^1a,^1a", options= "header", stripes ="none"] +|=== +| number of teeth | 0-4 | 5-9 | 10-14 | 15-19 | 20-24 | 25-29 |30-34 +| frequency | @ifsoln{4}| @ifsoln{0}| @ifsoln{0}| @ifsoln{0}| @ifsoln{1}| @ifsoln{9}| 35 +|=== + +@n Use the frequency table to draw a histogram below, filling in each interval so that its height is equal to the frequency. + +@vspace{1ex} + +@ifnotsoln{ @center{@image{../images/teeth-histogram.png, 400}}} +@ifsoln{ @center{@image{../images/teeth-histogram-soln.png, 400}}} + +== In @proglang + +[.linkInstructions] +Open the @starter-file{tooth-data} copy@ifproglang{pyret}{, and click "Run".} + +@n Type `tooth-table` in the Interactions window. Press enter. What do you see? @fitb{}{@ifsoln{The tooth data from above, with names!}} + +@n Type `count(tooth-table, "num-teeth")` in the Interactions window and press enter. How is the frequency table created in Pyret different from the one that you created, above? @fitb{}{@ifsoln{Different intervals were used.}} + +@fitb{}{} + +@n What bin sized was used (above) for the Tooth Data frequency table and the histogram? @fitb{5em}{@ifsoln{5}} + +@n @ifproglang{pyret}{Build `tooth-table`.}@ifproglang{codap}{Review the tooth table.} Does this data appear to be the same or different from the tooth data that appeared in the first section? @fitb{5em}{@ifsoln{Same.}} + + +@n @ifproglang{pyret}{Use the contract below to build a histogram of the distribution of teeth in @proglang. + +@show{(contract 'histogram '((table-name Table) (labels String) (column-name String) (bin-size Number)) "Image")} +} + +@ifproglang{codap}{Drag `Num Teeth` to the x-axis and select Group into Bins from the Configuration menu. Fuse dots into bars, then enter the desired bin width.} + +@n How does the histogram you created in @proglang look _similar_ to the one that you drew? Are there any ways in which the histogram you created in @proglang is _different_ than the one you created by hand? @fitb{}{@ifsoln{On the histogram I created, intervals are labeled with the minimum and maximum value.}} + +@fitb{}{@ifsoln{In @proglang, there are tick marks on the x-axis every 5 units.}} + +@fitb{}{} + diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/pages/reading-histograms.adoc b/lessons/Data-Science/histograms-visualize/langs/en-us/pages/reading-histograms.adoc new file mode 100644 index 00000000000..2fff64c3d15 --- /dev/null +++ b/lessons/Data-Science/histograms-visualize/langs/en-us/pages/reading-histograms.adoc @@ -0,0 +1,124 @@ += Reading Histograms + +== Small Local Animal Shelter + +@vspace{1ex} + +Using the histogram below, respond to the questions about the distribution of dogs' weights at a small local animal shelter. + +[cols="^.^4a,5a"] + +|=== + + +| @image{../images/dog-pounds-small.png} + +| +@n) How many dogs are represented on the histogram? @fitb{5em}{@ifsoln{15}} + +@n) How many dogs weigh less than 100 pounds? @fitb{5em}{@ifsoln{11}} + +@vspace{1ex} + +@n) True or False: The majority of dogs weigh between 40 and 60 pounds. @ifsoln{False} + + +@vspace{1ex} + + +@n) True or False: The dogs weigh between 20 and 180 pounds. @ifsoln{True} + + +@vspace{1ex} + +@n) True or False: The heaviest dog weighs between 40 and 60 pounds.@ifsoln{False} + + +@vspace{1ex} + +@n) True or False: The histogram shows us that one dog weighs exactly 140 pounds. @ifsoln{False} + +|=== + + +== Larger Animal Shelter + +@vspace{1ex} +Using the histogram below, respond to the questions about *dogs*' weights at a different (much larger) animal shelter. + + +[cols="^.^4a,5a"] + +|=== + +| +@image{../images/dog-pounds.png} + +| + +@n) True or False: There are two dogs that weigh at least 160 pounds. @ifsoln{True} + + +@vspace{1ex} + +@n) True or False: The majority of the dogs weigh between 40 and 60 pounds. @ifsoln{False} + + +@vspace{1ex} + + +@n) True or False: The lightest dog weighs zero pounds. @ifsoln{False} + + +@vspace{1ex} + +@n) True or False: Most commonly, dogs at this shelter weigh 40-60 pounds.@ifsoln{True} + + +@vspace{1ex} + +@n) True or False: There are 180 dogs at this animal shelter. @ifsoln{False} + + +@vspace{1ex} + +@n) True or False: There are more than 150 dogs at this animal shelter. @ifsoln{True} + + +|=== + +@vspace{1ex} + + + +Using the histogram below, write three statements about the *cats*' weights and their distribution at the large animal shelter. + +[cols="^.^3a,3a"] + +|=== + +| +@image{../images/cat-pounds.png} + +| + +@n) @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@n) @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@n) @fitb{}{} + +@fitb{}{} + +@fitb{}{} + + +|=== diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/pages/sorting-histograms.adoc b/lessons/Data-Science/histograms-visualize/langs/en-us/pages/sorting-histograms.adoc new file mode 100644 index 00000000000..b25b98de696 --- /dev/null +++ b/lessons/Data-Science/histograms-visualize/langs/en-us/pages/sorting-histograms.adoc @@ -0,0 +1,64 @@ += Sorting Histograms + +With your partner, sort the histograms into two piles: approximately symmetrical and definitely not symmetrical. Then, follow the prompts and respond to the questions below. + +@vspace{1ex} + +== Symmetrical Histograms + +_Put your asymmetrical cards aside (or back in their envelope)._ + +@n List out the letters of the histograms that were symmetrical: @fitb{}{@ifsoln{A, B, G, I, K, M, O, P, Q}} + +@n Sort the symmetrical cards into two or three logical groups. _Hint: It may be useful to think about peaks, gaps, clusters, center, and spread!_ What do the cards in your first group have in common? @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@n What do the cards in your second group have in common? (Describe your third group as well, if you have a third group.) @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@n Can you think of a different way of grouping these histograms? Explain. @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@n Describe how you can determine what's typical of a symmetrical histogram. @fitb{}{} + +@fitb{}{} + +== Asymmetrical Histograms + +_Put the symmetrical histograms away, and take out the asymmetrical histograms._ + +@n List out the letters of the histograms that were asymmetrical: @fitb{}{@ifsoln{C, D, E, F, H, J, L, N, K}} + + +@n Sort the asymmetrical histograms into two or three logical groups. What do the cards in your first group have in common? @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@n What do the cards in your second group have in common? (Describe your third group as well, if you have a third group.) @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@n Can you think of a different way of grouping these histograms? @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +@n Describe how you can determine where the outliers are on an asymmetrical histogram. @fitb{}{} + +@fitb{}{} + +@fitb{}{} diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/pages/workbook-pages.txt b/lessons/Data-Science/histograms-visualize/langs/en-us/pages/workbook-pages.txt new file mode 100644 index 00000000000..ccf0042cdf4 --- /dev/null +++ b/lessons/Data-Science/histograms-visualize/langs/en-us/pages/workbook-pages.txt @@ -0,0 +1,5 @@ +making-histograms.adoc +reading-histograms.adoc +sorting-histograms.adoc +bar-chart-v-histogram.adoc +choosing-bin-size.adoc diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/proglang.txt b/lessons/Data-Science/histograms-visualize/langs/en-us/proglang.txt new file mode 100644 index 00000000000..b09e8215f92 --- /dev/null +++ b/lessons/Data-Science/histograms-visualize/langs/en-us/proglang.txt @@ -0,0 +1,2 @@ +pyret +codap \ No newline at end of file diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/slides-codap.id b/lessons/Data-Science/histograms-visualize/langs/en-us/slides-codap.id new file mode 100644 index 00000000000..081fdab7d5a --- /dev/null +++ b/lessons/Data-Science/histograms-visualize/langs/en-us/slides-codap.id @@ -0,0 +1 @@ +1K97Xfu1mxcjCVis3_v-kyCH9x8AakiTdl73uWkIcMZo diff --git a/lessons/Data-Science/histograms-visualize/langs/en-us/slides-pyret.id b/lessons/Data-Science/histograms-visualize/langs/en-us/slides-pyret.id new file mode 100644 index 00000000000..d1a74831991 --- /dev/null +++ b/lessons/Data-Science/histograms-visualize/langs/en-us/slides-pyret.id @@ -0,0 +1 @@ +1U-hMS-iXKL13NFD4szyOWVuoXdTWPk79GilEROa8cSw diff --git "a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.49.39\342\200\257PM.png" "b/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.49.39\342\200\257PM.png" deleted file mode 100644 index 46f3b338eb3..00000000000 Binary files "a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/Screenshot 2024-08-22 at 4.49.39\342\200\257PM.png" and /dev/null differ diff --git a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds10.png b/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds10.png deleted file mode 100644 index aebff4d3c3b..00000000000 Binary files a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds10.png and /dev/null differ diff --git a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds100.png b/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds100.png deleted file mode 100644 index 475313a627c..00000000000 Binary files a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds100.png and /dev/null differ diff --git a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds25.png b/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds25.png deleted file mode 100644 index 2c92ec121fd..00000000000 Binary files a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds25.png and /dev/null differ diff --git a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds5.png b/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds5.png deleted file mode 100644 index d93fe8bfc2c..00000000000 Binary files a/lessons/Data-Science/histograms/langs/en-us/assessments/images-used-in-assessments/pounds5.png and /dev/null differ diff --git a/lessons/Data-Science/histograms/langs/en-us/images/lesson-images.json b/lessons/Data-Science/histograms/langs/en-us/images/lesson-images.json deleted file mode 100644 index 32b264e8424..00000000000 --- a/lessons/Data-Science/histograms/langs/en-us/images/lesson-images.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "pet-numbers.png": { - "description": "bar chart with 3 columns: cat (3), dog (3), rabbit (1). There is a space between each of the bars.", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "InterpretData.png" : { - "description" : "Interpret Data", - "source" : "Created by the Bootstrap Team based on work from @link{http://introdatascience.org/, Mobilizing IDS project} and @link{https://www.amstat.org/asa/files/pdfs/GAISE/GAISEPreK12_Intro.pdf, GAISE}", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "pet-weights.png": { - "description": "bar chart with 3 columns: cat (3), dog (3), rabbit (1). There is a space between each of the bars.", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "histogram-a.png": { - "description": "Histogram with 6 columns whose respective heights from left to right are 1, 1, 3, 3, 1, 1", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "histogram-b.png": { - "description": "histogram with 10 bars equal in height based on contrived data", - "source" : "Created by the Bootstrap Team", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "histogram-c.png": { - "description": "Histogram with 9 bars whose heights from left to right are 0,1,3,3,1,0,0,1,1", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "histogram-d.png": { - "description": "Histogram with 9 bars whose heights from left to right are 0,1,1,0,0,1,3,3,1", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "histogram-e.png": { - "description": "Histogram with 11 bars whose heights from left to right are 0,2,2,1,0,0,0,0,1,2,2", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "left-w-foot.png": { - "description": "A hill-shaped histogram, with a clump of taller bars on the right side, and smaller bars trailing off to the left=", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "right-w-foot.png": { - "description": "A hill-shaped histogram, with a clump of taller bars on the left side, and smaller bars trailing off to the right side", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "teeth-histogram.png": { - "description": "histogram with bin width of five: 0 to 4, 5 to 9, etc. ready to be filled in", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "teeth-histogram-soln.png": { - "description": "Teeth histogram", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - } -} diff --git a/lessons/Data-Science/histograms/langs/en-us/images/pet-numbers.png b/lessons/Data-Science/histograms/langs/en-us/images/pet-numbers.png deleted file mode 100644 index 3b5434573ce..00000000000 Binary files a/lessons/Data-Science/histograms/langs/en-us/images/pet-numbers.png and /dev/null differ diff --git a/lessons/Data-Science/histograms/langs/en-us/images/pet-weights.png b/lessons/Data-Science/histograms/langs/en-us/images/pet-weights.png deleted file mode 100644 index 4146692c12c..00000000000 Binary files a/lessons/Data-Science/histograms/langs/en-us/images/pet-weights.png and /dev/null differ diff --git a/lessons/Data-Science/histograms/langs/en-us/index.adoc b/lessons/Data-Science/histograms/langs/en-us/index.adoc deleted file mode 100644 index 1081f3dfed3..00000000000 --- a/lessons/Data-Science/histograms/langs/en-us/index.adoc +++ /dev/null @@ -1,310 +0,0 @@ -= Histograms - -@description{Students are introduced to Histograms by comparing them to bar charts, learning to construct them by hand and in the programming environment.} - -@ifproglang{pyret}{ -@lesson-prereqs{contracts-visualizations, ds-intro} -} - -@ifproglang{codap}{ -@lesson-prereqs{codap-dot-plots-bar-charts} -} - -@keywords{histogram, bin, interval} - -@add-to-lang{histogram} - -[@lesson-intro-table] -|=== -| Lesson Goals -| Students will be able to... - -@objectives -@objective{histograms-pyret} -@objective{Make charts displaying frequency in Pyret and interpret them.} - -| Student-facing Lesson Goals -| - -- Let's create histograms for datasets and learn how to interpret them. - -| Materials -|[.materials-links] - -@material-links - - -| Preparation -| -@preparation{ -- There is an interactive Desmos activity in the lesson, as well as Desmos formative assessments. - * Open the Desmos activity you plan to use (@starter-file{histograms}, @assessment{histograms-check1-desmos}, or @assessment{histograms-check2-desmos}) and make a code or link to share with students. - * If you are using our Google Slides, add the appropriate link to the slide deck. - * _If you're a first-time Desmos user, fear not!_ @dist-link{pages/desmos-instructions.adoc, _Here's what you need to do._} -- There is an optional kinesthetic activity in this lesson that requires a ball of play-dough for each group of 3. -} - -| Assessments -| [.assessment-links] -@assessments - -|=== - -== Introducing Histograms - -=== Overview -Students look at a bar chart and a histogram, compare/contrast them, and make observations about what they have in common and how they are different. Then they learn a more formal explanation of histograms and practice sketching one by hand. - -=== Launch - -@lesson-instruction{Turn to @printable-exercise{summarizing-columns.adoc}, answer the questions about the chart, and identify some ways in which bar charts and histograms are similar and different. -} - -@slidebreak - -@right{@image{images/pet-numbers.png, 250}} -The display on the left side of that page is a @vocab{Bar chart}. - -- The x-axis lists the values of a @vocab{categorical} variable (`species`). -- The y-axis shows the @vocab{frequency} of categorical values in the dataset. -- This chart happens to show the categorical values in alphabetical order from left to right, but it would be fine to re-order them anyway we wish. The bar for “dogs” could have been drawn before the one for “cats”, without changing the meaning of the display. - -@vspace{2ex} - -@slidebreak - -@right{@image{images/pet-weights.png, 250}} -The display on the right side is called a @vocab{histogram}. - -- Histograms show the distribution of @vocab{quantitative} data. -- Since quantitative data must follow a natural order, these bars _cannot_ be re-ordered. -- Histograms allow us to see the shape of a dataset. - -@clear - -@strategy{Optional: Kinesthetic Activity}{ - - -Divide the class into groups of three, and give each group a ball of play-dough. Have the groups roll the dough into a thick cylinder, then divide that cylinder in half. Then, have them take one of the halves and cut that in half _again_, then cut one of the resulting pieces in half once more. This will form *four* chunks of play-dough, with a ratio of *1:1:2:4* - -The play-dough represents a @vocab{sample}, with values falling into four intervals. The largest cylinder represents double the number of "data points" (amounts of dough) as the next largest, which in turn has double the data points of the two small ones. - -Histograms pile the data points into equally-sized intervals, just as the cylinders of dough are all of the same width. _More dough means longer cylinders_, since the "interval width" (cylinder thickness) stays fixed. - -Have students line up the cylinders from smallest-to-largest, laying them on a sheet of graph paper. Have them come up with labels for what the x- and y-axis might represent! -} - -=== Investigate -To build a histogram: - -- Start by sorting all of the numbers in a column from smallest to largest. -- Mark the x-axis from the smallest value (or a bit below) to the largest value (or a bit above). -- Divide the x-axis into equally-sized @vocab{bins} (also known as @vocab{intervals}). - * If our values ranged from 3 to 53 we might mark our x-axis from 0 to 60 and divide it into bins of width 10. - * If they range from 22 to 41 we might mark our x-axis from 20 to 45 and divide it into bins of width 5. -- Put each value in our dataset into the bin where it belongs, and then count how many values fall in each bin. -- The number of values in each bin determines the height of the bars on our y-axis. - -@slidebreak - -@lesson-instruction{ -Turn to @printable-exercise{making-histograms.adoc}, and try drawing a histogram from the dataset. -} - -@slidebreak - -In the histogram we just made, - -- We see that the data is clustered at the right-hand side of the histogram: most people in this sample have close to a full set of teeth, with some people missing a few more than others. -- Surprisingly, five people have almost no teeth at all! These people are very unusual, and they show up as a small bar far to the left of the main cluster. - -@lesson-point{ -Extreme values - which sit far above or below the others - are called _outliers_ -} - -- Note that intervals on this display include the left endpoint but not the right. If we included the right endpoint and someone had 0 teeth, we’d have to add on a bar from -5 to 0, which would be awfully strange! - -@slidebreak - -@lesson-instruction{ -Turn to @printable-exercise{reading-histograms-matching.adoc} and practice matching descriptions of video ratings to histograms that could fit the data. -} - -=== Synthesize -How are histograms and bar charts different? - -@teacher{ - -Want to check student mastery of the content you've just taught? Administer @assessment{histograms-check1-desmos} to get a snapshot of your students' current level of mastery. Make sure you have created a link or code for your class to the assessment. - -If you'd prefer to wait until your students have completed the __entire__ lesson to check mastery, we also offer a cumulative assessment at the end of @link{https://www.bootstrapworld.org/materials/latest/en-us/lessons/histograms/index.shtml?pathway=data-science#_choosing_the_right_bin_size_duration30_minutes -, "Choosing the Right Bin Size"}, below. - -} - -== Choosing the Right Bin Size - -=== Overview -Students learn to make histograms from the animals-dataset in Pyret and explore the importance of choosing the right bin size in order for a histogram to show us the shape of the data. - -=== Launch -Bins that are too small will hide the shape of the data by breaking it into too many short bars. Bins that are too large will hide the shape by squeezing the data into just a few tall bars. So far, the bins were provided for you. But how do you choose a good bin-size? - -@teacher{Make sure you have created a link or code for your class to @starter-file{histograms}.} - -@lesson-instruction{ -- Open the *Desmos* link I've shared with you. (The file should be called *Histogram Bin Size Exploration*.) -- Use the Bin Size slider to explore how changing the bin size impacts the shape of the histogram and what we can learn about the distribution of the data. -- Record your notices and wonders in the space provided on Slide 1. -- Before moving on to Slide 2, be sure to click the "New Dataset" button and see if you notice and wonder anything new. -- When you're done exploring Slide 1, move on to Slide 2 and answer the questions. -} - -=== Investigate -Suppose we want to know how long it takes for animals from the shelter to be adopted. - -@lesson-instruction{ -- Log into @starter-file{program-list}, open your saved Animals Starter File, and click "Run". -- Complete @printable-exercise{choosing-bin-size.adoc}. -} - -@teacher{Students who haven't saved this file yet can @starter-file{animals, make a new copy}.} - -@slidebreak - -@QandA{ -@Q{What did you Notice?} -@A{We see most of the histogram’s area under the two bars between 0 and 10 weeks, so we can say it was most common for an animal to be adopted in 10 weeks or less.} -@A{We see a small amount of the histogram’s area trailing out to unusually high values, so we can say that a couple of animals took an unusually long time to be adopted: one took even more than 30 weeks.} -@A{More than half of the animals (17 out of 31) took just 5 weeks or less to be adopted. But the few unusually long adoption times pulled the average up to 5.8 weeks.} - -@Q{What was a typical adoption time?} -@A{Almost all of the animals were adopted in 10 weeks or less, but a couple of animals took an unusually long time to be adopted -- even more than 20 or 30 weeks!} -@A{Be sure to draw attention to the fact that it would have been hard to give this summary by reading through the table, but the histogram makes it easy to see!} - -@Q{What bin sizes worked best for analyzing `adoption`?} -@A{Have students talk about the bin sizes they tried. Encourage open discussion as much as possible here, so that students can make their own meaning about bin sizes before moving on to the next point.} -} - -@slidebreak - -@lesson-point{ -Rule of thumb: a histogram should have between 5–10 bins. -} - -Histograms are a powerful way to display a dataset and assess its @vocab{shape}. Choosing the right bin size for a column has a lot to do with how data is distributed between the smallest and largest values in that column! With the right bin size, we can see the _shape_ of a quantitative column. - -@teacher{ -But how do we talk about or describe that shape, and what does the shape actually tell us? - -Our @lesson-link{visualizing-the-shape-of-data} lesson addresses these questions... and our @lesson-link{measures-of-center} lesson explores the effect of the shape of a histogram on the mean (average). -} - -@slidebreak - -@lesson-instruction{ -Apply what you've learned by completing @printable-exercise{data-cycle-histograms-animals.adoc}. -} - -=== Synthesize -- What would the histogram look like if most of the animals took more than 20 weeks to be adopted, but a couple of them were adopted in fewer than 5 weeks? -- What would the histogram look like if every animal was adopted in roughly the same length of time? - - -@teacher{ - -Want to check student mastery of the content you've just taught? Administer @assessment{histograms-check2-desmos} to get a snapshot of your students' current level of mastery. Make sure you have created a link or code for your class to the assessment. - -Alternatively, we offer a compilation of both Checkpoints in @assessment{histograms-cumulative-desmos}. -} - - - -@pd-slide{ -Shape is Critical! - -The axes are not labeled intentionally! We want you to get good at identifying shape without leaning on numbers, because numbers can be very misleading in statistics. - -K-12 mathematics doesn't talk about shape enough... and when we do talk about shape, we often give kids the misconception that all datasets should have a normal distribution - a hump in the middle of a bell curve. A robust focus on _shape_ helps address this misconception, while also helping to develop students' visual sense for statistics and distribution. -} - -== Data Exploration Project (Histograms) - -=== Overview - -Students apply what they have learned about histograms to their chosen dataset. They will add two items to their @starter-file{exploration-project}: (1) at least two histograms and (2) any interesting questions that emerge. - -@teacher{Visit @lesson-link{project-data-exploration} to learn more about the sequence and scope. Teachers with time and interest can build on the exploration by inviting students to take a deep dive into the questions they develop with our @lesson-link{project-research-paper}. -} - -=== Launch - -Before we shift our focus to your chosen datasets, let’s quickly review what we have learned about making and interpreting histograms. - -@QandA{ -@Q{Does a histogram display categorical or quantitative data? How many columns of data does a histogram display?} -@A{Histograms display a single column of quantitative data.} - -@Q{How is a histogram different from a bar chart?} -@A{Because a bar chart displays categorical data, we can rearrange the bars in any order we wish. Because the quantitative data of a histogram must follow a natural order, bars cannot be rearranged.} - -@Q{What do histograms show us about a dataset?} -@A{Histograms allow us to see the shape of one column of dataset.} - -@Q{How can you decide an appropriate bin size for your histogram?} -@A{A histogram should have 5-10 bins. We want to choose a bin size that lets us the shape of a quantitative column.} -} - - -=== Investigate - -Let’s connect what we know about histograms to your chosen dataset. - -@teacher{Students have the opportunity to choose a dataset that interests them from our @lesson-link{choosing-your-dataset/pages/datasets-and-starter-files.adoc, "List of Datasets"} in the @lesson-link{choosing-your-dataset} lesson. If you'd prefer to focus your class on a single dataset, we recommend the @starter-file{food}. -} - - -@lesson-instruction{ -- Open your chosen dataset starter file in @ifproglang{pyret}{Pyret.} @ifproglang{codap}{CODAP.} -- Choose one quantitative column from your data set that you will represent with a histogram. -- Create the histogram. -} - -@QandA{ -@Q{What question does your display answer?} -@A{Possible response: What is the shape of a particular quantitative column of my dataset?} -} - -@lesson-instruction{ -- Now, write down that question in the top section of @printable-exercise{data-cycle-histograms.adoc}. -- Then, complete the rest of the data cycle, recording how you considered, analyzed and interpreted the question. -- Repeat this process for at least one more quantitative column. -} - -@teacher{ -Confirm that all students have created and understand how to interpret their histograms.} - -@slidebreak - -@lesson-instruction{ -*It’s time to add to your @starter-file{exploration-project}.* - -- Copy/paste at least two histograms. Be sure to also add any interesting questions that you developed while making and thinking about histograms. -} - -@teacher{ - -You may need to help students locate the “Histogram” slide in the "Making Data Visualizations" section. They will need to duplicate the slide to add their second display. The “My Questions” section is at the end of the slide deck._ - -Note: During the next lesson, @lesson-link{visualizing-the-shape-of-data}, students will learn additional vocabulary to help them describe what they see in their histogram. They can add to their histogram interpretations at that point. -} - -=== Synthesize - -@teacher{Have students share their findings.} - -- Did you discover anything surprising or interesting about your dataset? - -- What questions did the bar and pie charts inspire raise? - -- Did other students make any discoveries that were surprising or interesting to you? (For instance: Did everyone find outliers? Was there more or less similarity than expected?) diff --git a/lessons/Data-Science/histograms/langs/en-us/pages/data-cycle-histograms-animals.adoc b/lessons/Data-Science/histograms/langs/en-us/pages/data-cycle-histograms-animals.adoc deleted file mode 100644 index 2b54e340116..00000000000 --- a/lessons/Data-Science/histograms/langs/en-us/pages/data-cycle-histograms-animals.adoc +++ /dev/null @@ -1,90 +0,0 @@ -= Data Cycle: Shape of the Animals Dataset -++++ - -++++ - -Use the Data Cycle to explore the distribution of one or more quantitative columns in @starter-file{animals} using *histograms*. - -@data-cycle{ "" - #:question "What is the shape of the age column of the Animals dataset?" - #:show-question? #t - #:question-type "statistical" - #:show-question-type? #f - #:rows "All the rows" - #:show-rows? #f - #:cols "age" - #:show-cols? #f - #:filter-fn "" - #:show-filter? #f - #:build-fn "" - #:show-build? #f - #:expression (if (string=? *proglang* "pyret") '(histogram animals-table "name" "age" 3) "Histogram. Select Group into Bins from Configuration menu. Fuse dots into bars, enter bin width") - #:show-expression? #f - #:finding "" - #:show-finding? #f - #:new-question "" - #:show-new-question? #f -} - -[.template, cols="^.^2a,13a"] -|=== -|@centered-image{../images/InterpretData.png} -| The histogram I created is for @fitbruby{16em}{@ifsoln{age}}{x-variable in context} from @fitbruby{16em}{@ifsoln{the animals dataset}}{dataset or subset}. - -The bin size I chose is @fitbruby{5em}{@ifsoln{3}}{bin size}, which resulted in a histogram with @fitbruby{5em}{@ifsoln{10}}{how many?} bins. I chose this bin size because @fitb{}{@ifsoln{a bin size of 5 did not capture the shape completely}} - -I would describe the shape of this histogram as @fitb{}{@ifsoln{Answers vary. More data on the left, then it trails out to the right.}} - -I notice that @fitbruby{}{@ifsoln{Answers vary. Felix is an outlier.}}{Consider statements like: Most of the histogram's area is... / A small amount of the histograms area trails out... / etc} - -I wonder @fitb{}{@ifsoln{Why are there are so many 2-3 year old animals? Answers vary.}} - -|=== - - -@span{.sectionbreak}{} - -@data-cycle{ "" - #:question "" - #:show-question? #f - #:question-type "statistical" - #:show-question-type? #f - #:rows "" - #:show-rows? #f - #:cols "" - #:show-cols? #f - #:filter-fn "" - #:show-filter? #f - #:build-fn "" - #:show-build? #f - #:expression "" - #:show-expression? #f - #:finding "" - #:show-finding? #f - #:new-question "" - #:show-new-question? #f -} - -[.template, cols="^.^2a,13a"] -|=== -|@centered-image{../images/InterpretData.png} -| The histogram I created is for @fitbruby{16em}{}{x-variable in context} from @fitbruby{16em}{}{dataset or subset}. - -The bin size I chose is @fitbruby{5em}{}{bin size}, which resulted in a histogram with @fitbruby{5em}{}{how many?} bins. I chose this bin size because @fitb{}{} - -I would describe the shape of this histogram as @fitb{}{} - -I notice that @fitbruby{}{}{Consider statements like: Most of the histogram's area is... / A small amount of the histograms area trails out... / etc} - -I wonder @fitb{}{} - -|=== diff --git a/lessons/Data-Science/histograms/langs/en-us/pages/data-cycle-histograms.adoc b/lessons/Data-Science/histograms/langs/en-us/pages/data-cycle-histograms.adoc deleted file mode 100644 index 4c3dacfc69c..00000000000 --- a/lessons/Data-Science/histograms/langs/en-us/pages/data-cycle-histograms.adoc +++ /dev/null @@ -1,48 +0,0 @@ -= Data Cycle: Shape of My Dataset - -Use the Data Cycle to explore the distribution of one or more quantitative columns from @lesson-link{choosing-your-dataset/pages/datasets-and-starter-files.adoc, "your chosen dataset"} using *histograms*, and write down your findings. - -@data-cycle{ "" - #:question "" - #:show-question? #f - #:question-type "statistical" - #:show-question-type? #f - #:rows "" - #:show-rows? #f - #:cols "" - #:show-cols? #f - #:filter-fn "" - #:show-filter? #f - #:build-fn "" - #:show-build? #f - #:expression "" - #:show-expression? #f - #:finding "" - #:show-finding? #f - #:new-question "" - #:show-new-question? #f -} - - -@span{.sectionbreak}{} - -@data-cycle{ "" - #:question "" - #:show-question? #f - #:question-type "statistical" - #:show-question-type? #f - #:rows "" - #:show-rows? #f - #:cols "" - #:show-cols? #f - #:filter-fn "" - #:show-filter? #f - #:build-fn "" - #:show-build? #f - #:expression "" - #:show-expression? #f - #:finding "" - #:show-finding? #f - #:new-question "" - #:show-new-question? #f -} diff --git a/lessons/Data-Science/histograms/langs/en-us/pages/making-histograms.adoc b/lessons/Data-Science/histograms/langs/en-us/pages/making-histograms.adoc deleted file mode 100644 index 1e1b9f798bd..00000000000 --- a/lessons/Data-Science/histograms/langs/en-us/pages/making-histograms.adoc +++ /dev/null @@ -1,28 +0,0 @@ -= Making Histograms - -Suppose we have a dataset for a group of 50 adults, showing the number of teeth each person has: - -[cols="^3a,^2a",options="header"] -|=== -| Number of teeth | Frequency -| 0 | 5 -| 22 | 1 -| 26 | 1 -| 27 | 1 -| 28 | 4 -| 29 | 3 -| 30 | 5 -| 31 | 3 -| 32 | 27 - -|=== - -@vspace{2ex} - -*Draw a histogram for the table in the space below.* For each row, find which interval -(or “bin”) on the x-axis represents the right number of teeth. Then fill in the box so that -its height is equal to the _sum of the counts_ that fit into that interval. One of -the intervals has been completed for you. - -@ifnotsoln{ @centered-image{../images/teeth-histogram.png, 800} } -@ifsoln{ @centered-image{../images/teeth-histogram-soln.png, 800} } diff --git a/lessons/Data-Science/histograms/langs/en-us/pages/notes-histograms.adoc b/lessons/Data-Science/histograms/langs/en-us/pages/notes-histograms.adoc deleted file mode 100644 index 4b60206be30..00000000000 --- a/lessons/Data-Science/histograms/langs/en-us/pages/notes-histograms.adoc +++ /dev/null @@ -1,55 +0,0 @@ -= Histograms in a Nutshell -++++ - -++++ -To best understand histograms, it's helpful to contrast them first with bar charts. - -@vspace{1ex} - -*Bar charts* show the number of rows belonging to a given category. The more rows in each category, the taller the bar. - -- Bar charts provide a visual representation of the frequency of values in a *categorical* column. - -- There’s no strict numerical way to order these bars. - - * The frequency of red, yellow and blue balloons in the sample would make sense no matter what order they get presented in. - * But *sometimes there’s one order that makes more sense than the others*. For example, it would be logical to show the number of t-shirt sizes in order of smallest to largest shirt. - -@vspace{2ex} - -*Histograms* show the number of rows that fall within certain intervals, or “bins”, on a horizontal axis. The more rows that fall within a particular “bin”, the taller the bar. - - - _Histograms provide a visual representation of the frequencies (or relative frequencies) of values in a *quantitative* column._ - - - Quantitative data *can always be ordered*, so the bars of a histogram always progress from smallest (on the left) to largest (on the right). - - - When dealing with histograms, it’s important to select a good *bin size*. If the bins are too small or too large, it is difficult to see the shape of the dataset. Choosing a good bin size can take some trial and error! - -@vspace{2ex} - -The *shape* of a dataset tells us which values are more or less common. -[.last-list] - - - @right{@image{../images/histogram-a.png, 100}}In a *symmetric* dataset, values are just as likely to occur a certain distance above the mean as below the mean. Each side of a symmetric distribution looks almost like a mirror-image of the other. - - - Some extreme values may be far greater or far lower than the other values in a dataset. These extreme values are called *outliers*. - - - @right{@image{../images/histogram-d.png, 100}}A dataset that is *skewed left* has a few values that are unusually low. The histogram for a skewed left dataset has a few data points that are stretched out to the left (lower) end of the x-axis. - - - @right{@image{../images/histogram-c.png, 100}}A dataset that is *skewed right* has a few values that are unusually high. The histogram for a skewed right dataset has a few data points that are stretched out to the right (higher) end of the x-axis. - - - One way to visualize the difference between a histogram of data that is *skewed left* or *skewed right* is to think about the lengths of our toes on our left and right feet. - -@indented{ -[cols="1a,1a", stripes="none", frame="none", grid="none"] -|=== -|Much like the bar lengths of a histogram that is "skewed left", our left feet have smaller toes on the left and a bigger toe on the right. -|Our right feet have the big toe on the left and smaller toes on the right, more closely resembling the shape of a histogram of "skewed right" data. -|@centered-image{../images/left-w-foot.png, 100} -|@centered-image{../images/right-w-foot.png, 100} -|=== -} diff --git a/lessons/Data-Science/histograms/langs/en-us/pages/summarizing-columns.adoc b/lessons/Data-Science/histograms/langs/en-us/pages/summarizing-columns.adoc deleted file mode 100644 index 5e6c0c59234..00000000000 --- a/lessons/Data-Science/histograms/langs/en-us/pages/summarizing-columns.adoc +++ /dev/null @@ -1,67 +0,0 @@ -= Summarizing Columns with Bar Charts & Histograms - -++++ - -++++ - -[cols="3a,3a,2a,2a",options="header"] -|=== -^| name ^| species ^| age ^| pounds -| `"Sasha"` | `"cat"` | 1 | 6.5 -| `"Boo-boo"` | `"dog"` | 11 | 12.3 -| `"Felix"` | `"cat"` | 16 | 9.2 -| `"Nori"` | `"dog"` | 6 | 35.3 -| `"Wade"` | `"cat"` | 1 | 3.2 -| `"Nibblet"` | `"rabbit"`| 6 | 4.3 -| `"Maple"` | `"dog"` | 3 | 51.6 -|=== - -@vspace{3ex} - -[.FillVerticalSpace, cols=".^1a,.^19a,.^5a"] -|=== -|1| How many cats are there in the table above? -| @ifsoln{3} - -|2| How many dogs are there? -| @ifsoln{3} - -|3| How many animals weigh between 0 and 20 pounds? -| @ifsoln{5} - -|4| How many animals weigh between 20 and 40 pounds? -| @ifsoln{1} - -|5| Are there more animals weighing 40-60 pounds than 60-140 pounds? -| @ifsoln{yes} - -// need empty line here so the closing table block isn't -// swallowed -|=== - -@vspace{3ex} - -The two visualizations below both summarize this table. The display on the left is a *Bar Chart*, while the one on the right is a *Histogram*. What is similar about them? What is different? - -@left{@image{../images/pet-numbers.png, 275}} @right{@image{../images/pet-weights.png, 260}} - -[.FillVerticalSpace, cols=".^1a,.^1a",options="header"] -|=== - -^| Similarities ^| Differences -| @ifsoln{(Student responses vary.)} -| @ifsoln{(Student responses vary.)} - -| @ifsoln{Both involve vertical bars.} -| @ifsoln{The bars are different.} - -| @ifsoln{Both show quantity/frequency.} -| @ifsoln{The left uses categorical data. The right uses quantitative data.} - -// need empty line here so the closing table block isn't -// swallowed -|=== - diff --git a/lessons/Data-Science/histograms/langs/en-us/pages/workbook-pages.txt b/lessons/Data-Science/histograms/langs/en-us/pages/workbook-pages.txt deleted file mode 100644 index 5b1a02d3ca4..00000000000 --- a/lessons/Data-Science/histograms/langs/en-us/pages/workbook-pages.txt +++ /dev/null @@ -1,7 +0,0 @@ -notes-histograms.adoc -summarizing-columns.adoc -making-histograms.adoc -reading-histograms-matching.adoc -choosing-bin-size.adoc -data-cycle-histograms-animals.adoc -data-cycle-histograms.adoc \ No newline at end of file diff --git a/lessons/Data-Science/measures-of-center/langs/en-us/images/buildings-by-height.png b/lessons/Data-Science/measures-of-center/langs/en-us/images/buildings-by-height.png new file mode 100644 index 00000000000..6e8e4697170 Binary files /dev/null and b/lessons/Data-Science/measures-of-center/langs/en-us/images/buildings-by-height.png differ diff --git a/lessons/Data-Science/measures-of-center/langs/en-us/images/lesson-images.json b/lessons/Data-Science/measures-of-center/langs/en-us/images/lesson-images.json index 47e6d49dfbb..a6985f881f4 100644 --- a/lessons/Data-Science/measures-of-center/langs/en-us/images/lesson-images.json +++ b/lessons/Data-Science/measures-of-center/langs/en-us/images/lesson-images.json @@ -38,5 +38,10 @@ "description": "number line with dots above the line representing the weight of each of the animals. There is a big cluster between 0 and 17, a small cluster in the 30s and another small cluster in the 40s and individual dots spread out along the number line til a little above 120.", "source" : "Created by the Bootstrap Team in CODAP based on contrived data", "license" : "Creative Commons 4.0 - NC - SA" + }, + "buildings-by-height.png": { + "description": "An infographic of the 6 tallest buildings (1908-1974) arranged by height, with the median value circled", + "source" : "Wikimedia Commons", + "license" : "Creative Commons 4.0 - NC - SA" } } diff --git a/lessons/Data-Science/measures-of-center/langs/en-us/images/number-children.png b/lessons/Data-Science/measures-of-center/langs/en-us/images/number-children.png new file mode 100644 index 00000000000..2884bb90c1d Binary files /dev/null and b/lessons/Data-Science/measures-of-center/langs/en-us/images/number-children.png differ diff --git a/lessons/Data-Science/measures-of-center/langs/en-us/images/sat-verbal.png b/lessons/Data-Science/measures-of-center/langs/en-us/images/sat-verbal.png new file mode 100644 index 00000000000..5bf94a63b6a Binary files /dev/null and b/lessons/Data-Science/measures-of-center/langs/en-us/images/sat-verbal.png differ diff --git a/lessons/Data-Science/measures-of-center/langs/en-us/index.adoc b/lessons/Data-Science/measures-of-center/langs/en-us/index.adoc index 9fb8fc11fa1..d03c0b1d55d 100644 --- a/lessons/Data-Science/measures-of-center/langs/en-us/index.adoc +++ b/lessons/Data-Science/measures-of-center/langs/en-us/index.adoc @@ -2,7 +2,7 @@ @description{Students are introduced to mean, median and mode(s) and consider which of these measures of center best describes various quantitative data.} -@lesson-prereqs{visualizing-the-shape-of-data} +@lesson-prereqs{dot-plots} @keywords{mean, median, mode, modes} @@ -15,13 +15,11 @@ | Students will be able to... @objectives -@objective{measures-of-center-pyret} -@objective{Recognize that the Measures of Center we can compute are not all necessarily typical of the distribution.} | Student-facing Lesson Goals | -- Let's use mean, median, and modes to describe our data. +- Let's use mean, median, and mode(s) to describe our data. | Materials |[.materials-links] @@ -35,150 +33,127 @@ | Preparation | @preparation{ -- The kinesthetic activity in this lesson requires a ruler and 4-8 pennies for each group of 3. +- During the kinesthetic activity in this lesson, you will split your students into groups of 3. Each group will need a ruler, a marker, tape, and 5 pennies. - There is an optional activity in this lesson that would require you to print an extra data cycle for your students. -- In each of the first 3 Investigate sections of this lesson, students will calculate a Measure of Center from a list. If you would like to be able to direct students to printed versions with space for them to write, make copies of @opt-printable-exercise{mean-median-modes.adoc}. -- If you are using our Google Slides, adjust them based on which of these activities you will be doing with your students. +- The first section of the lesson is an introduction to measures of center. If you are confident that your students have a solid understanding of mean, median, and mode, you can jump to the second section of the lesson, where students will use @proglang to compute measures of center, and then consider which measure of center is the most appropriate. Looking for a do now to get the juices flowing before skipping ahead? Try @printable-exercise{mean-median-modes.adoc}. +- If you are using our Google Slides, adjust the slides based on which portions of the lesson you will be doing with your students. } |=== -== Mean +== Mean, Median, and Mode(s) + +@objective{measures-of-center-proglang} === Overview -Students learn about @vocab{mean} (or "average") as one way (among others!) to summarize a @vocab{quantitative} column, and how to compute it using @proglang. + +Students learn how to compute three different measures of center. === Launch -One of the ways that Data Scientists summarize quantitative data is by talking about its _center_ - literally asking "what is a typical value in this sample?", in the hopes of inferring something about a larger population. +@teacher{If your students are confident with computing mean, median, and mode, you may want to skip to the next section of the lesson, where students will (1) use Pyret to compute various measures of center and (2) begin considering the usefulness of each measure of center.} -But there are many different ways to define "center", and each method has strengths and weaknesses. The shape of the data can play a huge role in whether or not one kind of summary is appropriate! -@slidebreak +When working with dot plots, we often wondered, "What is a typical value in this sample?" -@lesson-instruction{ -Let's take a moment to consider what values might be typical for the weight of our animals by completing @printable-exercise{typical.adoc}. -} +\... and we often discovered that there were a variety of correct responses! -@slidebreak +When thinking about typicality, there are a few approaches: -@center{@image{images/pounds.png}} +- identify the value that is most common +- identify the midpoint of the sample +- identify a "balance point" among the data points -@QandA{ -@Q{Do you think there is a midpoint of this sample?} -@A{There are 32 animals - meaning that there is not one point in the middle.} - -@Q{Is there a value that shows up most often in this sample?} -@A{Since we see that dots are stacked up, it seems likely that there is some repetition in the animals' weights.} +It turns out that these different ways of thinking about typicality are all different ways of "measuring the center". -@Q{What value did you decide was typical? Why?} -@A{There isn't one right answer here! The point is for students to hear each other's thinking, recognize that it's hard to summarize the data with a single number, and understand that there are different logical frameworks for doing so.} +Another way of asking _"What value is typical?"_ would be _"Is there a value that all the others tend to cluster around?"_ -} +Statisticians have a more formal, mathematically-defined term for this: @vocab{central tendency}. Central tendency is a "summary" measure, that attempts to describe a whole set of data with a single value that represents the middle or "center" of its distribution. In other words, _a value that all the others tend to cluster around_. -Each of these are different ways of “measuring center”. +There are several different measures of central tendency. Data Scientists know what each measure is, and when to use it. And in the next few lessons, you'll learn that too. -=== Investigate +@teacher{If students struggle to arrive at correct responses for the prompts below, proceed with this lesson as written, at whatever speed and level of depth you deem appropriate for your students. If your students are successful at computing mean, median, and mode, you can skip to the second section of the lesson. You may also consider using @printable-exercise{mean-median-modes.adoc} as a warm up/review. If you know that your students do not know how to compute mean, median, and mode, you can skip the first question, below.} -The Animal Shelter Bureau used a method of summary, called the @vocab{mean}, or "average" to report about the typical weight of pets, claiming that a typical animal weighs 40 pounds. @QandA{ -@Q{What do you already know about averages?} -@A{Sample Answer: To find the @vocab{mean} of a dataset we add all of the values and then divide their sum by the number of values in the dataset.} -@A{The @vocab{mean} is the number that "balances" all the other numbers in the sample.} -} - -@slidebreak - -@opt{The Mean section of @opt-printable-exercise{mean-median-modes.adoc} includes a printed version of the upcoming list.} +@Q{Compute the mean, median, and mode for these datasets: +** Dataset 1: 17, 23, 24, 23, 22 +** Dataset 2: 5, 5, 9, 9, 3, 29 +** Dataset 3: 8, 34, 15, 4, 76} -@QandA{ -@Q{We are going to learn to let @proglang compute the mean for us, but let's first make sure we understand what we're asking @proglang to do! How would we find the mean weight of five animals who weigh 17, 25, 23, 23 and 22 pounds?} -@A{First add @math{17 + 25 + 23 + 23 + 22 = 110} and then divide @math{110 \div 5 = 27}} -} +@A{Solutions. *Dataset 1* - mean: 22; median: 23; mode: 23. *Dataset 2* - mean: 10; median: 7; modes: 5, 9. *Dataset 3* - mean: 27.4; median: 15; mode: none} -@teacher{If you have time, we recommend deepening your students' understanding by engaging them with the kinesthetic activity: Finding the Value of the Balancing Point! +@Q{Tell me everything you know about measures of center.} +@A{The goal of this question is to gauge students' general level of comfort with mean, median, and mode. Take notes on the board, which you can add to as students make additional discoveries about measures of center during the instruction.} } -@strategy{Kinesthetic Activity - Finding the Value of the Balancing Point}{ - - -The arithmetic mean is the number that "balances" all the other numbers in the sample. So let's do some real balancing! -_Divide the class into groups of three. Supply each group with a ruler and 4-8 pennies. Make sure every group has at least one pen or pencil._ +=== Investigate -1. The ruler represents a number line with values (weight) distributed equally across the line. If there's values at every inch from 0 to 12, where should the pencil be placed in order to balance the ruler on top of it? -2. Place a penny at 1 and 11. Where must the pencil be placed to balance those two values? What is the mean of the values [1, 11]? -3. Place pennies at 1, 9 and 11. Where must the pencil be placed to balance those two values? What is the mean of of the values [1, 9, 11]? -4. Suppose you were to place two pennies at 2, and a third penny at 8. Can you _predict_ where the pencil should be placed? -} +==== Mean -@slidebreak - -@ifproglang{pyret}{ -Pyret has a function that will compute the mean -- or average -- of any quantitative column in a Table. +We can think of the @vocab{mean} as the balancing point of a dataset, or the value where the "weight" of all data points on one side is equal to the "weight" of all data points on the other side. Let's explore this idea. -@hspace{2em} @show{ (contract "mean" '("Table" "String") "Number" )} -} - -@ifproglang{codap}{ -To compute the mean in CODAP, create a graph of randomly distributed points, then drag a quantitative column to the x-axis. From the `Measure` menu, select Mean. *If this information is not on your Data Visualizations Organizer, add it now!* +@teacher{ +Divide the class into groups of three. Supply each group with a marker, a ruler, tape, and 5 pennies. } -Let's test it out! - - @lesson-instruction{ -- Log into @starter-file{program-list}, open your saved "Animals Starter File" and click "Run". - * _Any student who doesn't have a copy of the @starter-file{animals} can open a new one._ -- Turn to @printable-exercise{summarizing-columns-moc.adoc} @ifproglang{pyret}{and use the provided code} to compute and record the @vocab{mean} weight. +- We are going to build a seesaw (also known as a teeter-totter) with a marker and a ruler. +- Tape the marker flat onto your desk using two pieces of tape. +- Set the ruler on top of the marker; try to balance it so that it hovers parallel to the surface of the desk. } -@slidebreak @QandA{ -@Q{How did your calculation compare to the Animal Shelter Bureau's claim that the average pet weighs nearly 40 pounds?} -@A{39.715625 is very close to 40!} +@Q{To balance the ruler over the marker, where must the marker be positioned? Why?} +@A{The balance point is at 6 inches. There is an equal amount of weight on each side of the marker when it is placed at 6 inches.} + +@Q{Now, tape one penny at 1 inch and another penny at 11 inches. Did you need to change the location of the marker to keep the ruler balanced?} +@A{To balance the ruler, both pennies must be the _same_ distance from the marker (or fulcrum). Both 11 and 1 are 5 units away from 6.} -@Q{When might it be useful to know the average weight of the animals? _Answers will vary._} -@A{If we were transporting them to a different shelter, knowing the average weight might help us confirm that a truck, boat or plane could support their collective weight.} +@Q{Keep the two pennies at 1 and 11, but tape a third penny at 9 inches. Adjust your ruler so that it hovers parallel to the desk. Did you need to change the location of the marker to keep the ruler balanced? Explain.} +@A{The ruler is balanced when the marker is at 10.5 inches. To balance the ruler, the total distance of the pennies _below_ the balance point must add up to the distance of the penny above the balance point.} -@Q{When might it be risky to describe the weight of these animals using the average? _Answers will vary._} -@A{If one of them were sick and we wanted to give it medicine, basing the dosage on the average would likely be way too little medicine for a big animal or a dangerously large amount of medicine for a little animal.} +@Q{Tape the last penny onto a position of your choice. Find the balance point. Be prepared to share your results with the class.} +@A{We recommend drawing some sketches on the board of students' balanced rulers, adding and labeling arrows to represent the distance from each penny to the balance point.} +@A{The key idea is that *the total distance from the mean to the lower data points must equal the total distance from the mean to upper data points.*} } -=== Possible Misconceptions -Just because a column contains numbers doesn't mean the data is quantitative. We could sum and divide a collection of zip codes, for example, but the output wouldn’t correspond to some “center” zip code. -=== Synthesize -If you heard that the mean age of students in a kindergarten class was 21, would you be surprised? Why or why not? +@strategy{Strategy: Building a Conceptual Understanding of Mean}{ -== Median +Very commonly, students develop a computational understanding of mean, but not a conceptual one (@citation{bakker-et-al-2005}, @citation{pollatsek-et-al-1981}). Without a strong conceptual understanding of mean, students will struggle to determine _which_ measure of center is most appropriate in a given situation. We use activities recommended in the research (such as the kinesthetic one described above) to combat this misconception. -=== Overview -Students learn the algorithm @ifproglang{pyret}{and code} for a second measure of center: the @vocab{median} and consider situations where taking the median is more appropriate than the mean. +Interpreting the mean as the "fair share" can also help students think conceptually about mean. In other words: _What amount will each member of a group get if everything is distributed equally?_ Sometimes, a thought experiment is most appropriate for conveying the "fair share" interpretation, e.g. if there are five dogs of differing weights, how would we redistribute the weight so that each of the five dogs is an equal weight? If you have students who struggle to think about mean as a balancing point, you might consider sharing this alternative interpretation. +} -=== Launch -You computed the mean of that column to be almost exactly 40 pounds. That IS the average... -...but if we scan the dataset we'll quickly see that most of the animals weigh less than 40 pounds. In fact, more than half of the animals weigh less than just 15 pounds. +To compute the mean of any dataset, we add up all of the values, and then divide by the number of values in the dataset. This algorithm reveals to us our balance point--and we don't even need the pennies, the ruler, or the trial and error! -Why is the average so high? @slidebreak _Kujo and Mr. Peanutbutter!_ +@lesson-instruction{ +- Turn to @printable-exercise{mean-median-modes.adoc} and complete the first section of the page. +- When you are finished, compare your answers with a partner's answers and correct any mistakes. +} -*The mean is being thrown off by a few extreme data points*, called @vocab{outliers} because they fall far outside of the rest of the dataset. The mean may also be thrown off by the presence of @vocab{skewness}: a lopsided shape due to values trailing off to the left or right. -@slidebreak +==== Median -*There is another measure of center we can use* called the @vocab{median}. Instead of averaging the data points, it identifies the “middle” value, which half of the values are smaller than and the other half are larger than. +*There is another measure of center we can use* called the @vocab{median}. Instead of averaging the data points, it identifies the “middle” value, dividing the data into two groups. Half of the values are less than the median, and the other half are greater than median. In the image below, 40 Wall Street represents the median height of the dataset; three buildings are shorter, and three buildings are taller. + +@right{@image{images/buildings-by-height.png, 250}} The algorithm for finding the median of a quantitative column is: -1. Sort the numbers -2. Cross out the highest and lowest number -3. Repeat until there is only one number left... -4. When there are an even number of numbers in the list, as in the example below, there will be two numbers left at the end. Take the _mean_ of those two numbers. +1. Sort the numbers. +2. Cross out the highest and lowest number. +3. Repeat until there is only one number left. +4. When there are an even amount number of numbers in the list, as in the example @ifnotslide{below} @ifslide{on the next slide}, there will be two numbers left at the end. Take the _mean_ of those two numbers. + +@teacher{ +Address the common misconception that the median is just a cut point in the data. Yes, the median is the middle value, but it is also a *measure of center*, meaning that it offers a characterization of the *entire* group of datapoints. Measures of center always summarize the values of a dataset with a single number.} @slidebreak @@ -199,69 +174,23 @@ There is no middle number. So the median of this list will be the mean of the tw @math{7 + 9 = 16 and 16 \div 2 = 8} } -@opt{The Median section of @opt-printable-exercise{mean-median-modes.adoc} includes a printed version of the upcoming list.} - @slidebreak -@QandA{ -Find the @vocab{median} value of each of these two lists: -@Q{The median of 11, 3, 7 ,4, 5 is...} -@A{5 because it's the middle value of 3, 4, 5, 7, 11.} - -@Q{The median of 11, 3, 7, 4 is...} -@A{5.5 because it's the mean of 4 and 7, which are the middle values in the ordered list 3, 4, 7, 11} -} - - -=== Investigate -@lesson-instruction{Turn back to @printable-exercise{summarizing-columns-moc.adoc} @ifproglang{pyret}{and use the provided code} to compute and record the median for the `pounds` column in the Animals Dataset.} - -@slidebreak{InvestigateC} - -@QandA{ -@Q{How do the mean and median compare?} -@A{The median (11.3) is very different from the mean (39.7)!} - -@Q{Here we see the median (red) and mean (blue). @image{images/num-line-pounds2.png} Which do you think better represents the data?} -@A{The median, because over half of the data is clustered quite close to it and the rest of the data is dispersed across a huge range. Very few animals have a weight close to 39.7.} - -@Q{If the median were much higher than the mean, what would we expect to be true about the distribution of the dataset?} -@A{The dataset is skewed left or has some very low outliers.} +@lesson-instruction{ +- Complete the Median section of @printable-exercise{mean-median-modes.adoc}. +- Compare your answers with a partner. } @slidebreak -@lesson-point{ -The @vocab{mean} is a useful calculation when all of the points are fairly balanced on either side of the middle, but it distorts things for datasets with imbalance and extreme outliers. + -For skewed datasets, the @vocab{median} is a better summary.} - -=== Synthesize - -Mean is generally the best measure of center, because it includes information from every single point. But it's misleading for highly-skewed datasets, so statisticians fall back to the median. - -@QandA{ -@Q{Why would looking at the histogram for a dataset help us to decide whether *mean* or *median* would be a better measure of center?} -@A{Median is less sensitive to skew than mean, so seeing the shape will determine whether there's a need for median over mean.} -@Q{When there's a strong _left_ skew, will the mean be less than or greater than the median?} -@A{Less: the left skew pulls the mean to lower values.} -} - -== Mode(s) +==== Mode(s) -=== Overview -Students learn about the mode(s) of a dataset, how to compute them, and when it is appropriate to use them as a measure of center. - -@ifproglang{pyret}{ -@teacher{Note: Mode(s) are often used to describe categorical data. Since Pyret can currently only calculate mode(s) from quantitative columns, we won't be discussing that in this lesson... keep your ears peeled for news of an update next year!} -} - -=== Launch The third measure of center is called the @vocab{mode(s)} of a dataset. The @vocab{mode(s)} of a dataset are the values that appear _most often_. -Median and Mean always produce one number and many datasets are what we call “unimodal”, having just one mode. But sometimes there are exceptions! +Median and mean always produce one number and many datasets are what we call “unimodal”, having just one mode. But sometimes there are exceptions! -- If two or more values are equally common, there can be more than one mode. - If all values are equally common, then there is no mode at all! +- If two or more values are equally common, there can be more than one mode. @slidebreak @@ -276,68 +205,54 @@ Consider the following three datasets: - The mode of the second dataset is 2, since 2 appears more than any other number. - The modes (plural!) of the last dataset are 1 and 4, because 1 and 4 both appear more often than any other element, and because they appear equally often. + @slidebreak -@opt{The Modes section of @opt-printable-exercise{mean-median-modes.adoc} includes a printed version of the upcoming list.} +Can you find the mode(s) of this dataset? -@lesson-instruction{ -Take a minute to identify the mode(s) for each of the following datasets: +``` +red, green, red, yellow, blue, red, purple, purple +``` -- 11, 3, 7, 4, 5 -- 5, 7, 11, 11, 7, 7 -- 2, 3, 5, 4, 3, 7, 4 +@teacher{The mode here is red, which appears three times on the list. Highlight for students that *yes*, we can find the mode of a categorical dataset!} -} @slidebreak -@ifproglang{pyret}{ -Pyret has a function that will compute the modes of any quantitative column in a Table. +@lesson-instruction{ +- Complete the Modes section of @printable-exercise{mean-median-modes.adoc}. +- Compare your answers with a partner's. Correct any mistakes. +} -@hspace{2em} @show{ (contract "modes" '("Table" "String") "List" )} -_Note: `List` is a new data type!_ -Let's test it out! -} +=== Synthesize -@ifproglang{codap}{ -The easiest way to determine modes in CODAP is to sort a column. Do this by clicking on the column name and then selecting from the drop-down menu either Sort option. Scan the column to see which values are the most common. -} +@QandA{ -=== Investigate -@lesson-instruction{ -- Turn to @printable-exercise{summarizing-columns-moc.adoc} @ifproglang{pyret}{and use the code provided} to compute and record the `modes` of the `pounds` column. -- Then complete the remaining questions in the *Summarizing the `Pounds` Column* section. -} +@Q{If you heard that the mean age of students in a kindergarten class was 21, would you be surprised? Why or why not?} +@A{Sample response: yes, that would be surprising. Usually students in kindergarten are 4 or 5 years old!} -@slidebreak +@Q{Is the median always one of the values in the dataset? If not, when is it not?} +@A{No, the median is not always one of the values in the dataset. Sometimes, when there are an even number of datapoints, we need to average the two middle values to find the median.} -@QandA{ -@Q{What did you learn from calculating the mode(s)?} -@A{The most common animal weights are 0.1 and 6.5! That’s well below our mean and even our median, which is further evidence of outliers or skewness.} +@Q{How come we can find the mode of a categorical dataset, but not the median or the mean?} +@A{Finding the mode does not require us to perform any arithmetic computations. Computing the median or the mean does require us to perform some arithmetic, therefore we can only use quantitative data.} -@Q{Can we find the mean, median and mode(s) for any column?} -@A{No! We can only calculate Measures of Center for @vocab{quantitative} columns.} -@A{Note: Not all columns that contain numbers are quantitative! Taking the average of a list of zip codes doesn’t tell us anything at all!} } -=== Synthesize -- What must be true about a dataset for the mode(s) to do a good job of describing what is typical? -- What can we learn from the modes of a dataset? +== Choosing the Right Measure of Center -== The Risk of Summarizing Data with a Single Number +@objective{best-measure-of-center} === Overview -Students consider the complexity of summarizing with a single number and learn how to decide which measure of center to use when. They then choose a column, compute all of its measures of center in @proglang, and interpret the results. Finally, they practice computing measures of center for a small dataset by hand and use their findings to critique misleading statements. +Students use @proglang to compute measure of center, and then consider which measure of center is most appropriate in a given situation. === Launch -Summarizing a big dataset means that some information gets lost, so it’s important to pick an appropriate summary. Picking the wrong summary can have serious implications! - -@slidebreak +Summarizing a big dataset means that some information gets lost, so it's important to pick an appropriate summary. Here are just a few examples of summary data being used for important things: @@ -346,61 +261,89 @@ Here are just a few examples of summary data being used for important things: - Adults are often summarized by a single number -- like their credit score -- which determines their ability to get a job or a home loan. - When buying uniforms for a sports team, a coach might look for the most common size that the players wear. -@lesson-instruction{ -What other examples can you think of where a number or two are used to summarize something complex? -} +@slidebreak + +Picking the wrong summary value (mean, median, or mode) can have serious implications! + +Let's learn how to use @proglang to quickly, easily compute the three different measures of center so the we can spend our energy thoughtfully deciding *which* measure of center is the most appropriate in a given situation, rather than number crunching. === Investigate -You now have three different ways to measure center in a dataset. Every kind of summary has situations in which it does a good job of reporting what’s typical, and others where it doesn’t really do justice to the data. -But how do you know which one to use? Depending on the shape of the dataset, a measure could be really useful or totally misleading! +@ifproglang{pyret}{ + +Pyret has functions that will compute mean, median, and mode. -@slidebreak +@hspace{2em} @show{ (contract "mean" '("Table" "String") "Number" )} + +@hspace{2em} @show{ (contract "median" '("Table" "String") "Number" )} + +@hspace{2em} @show{ (contract "modes" '("Table" "String") "List" )} + +@teacher{ +Note: `List` is a new data type! +} @QandA{ -@Q{"In 2003, the average American family earned $43,000 a year -- well above the poverty line! Therefore, very few Americans were living in poverty."} -@Q{Do you trust this statement? Why or why not?} -@A{Sample response: The mean is sensitive to outliers, and billionaires like Elon Musk, Jeff Bezos, etc. pull the mean heavily to the right. This makes it appear that the "average" American family earns far more than they actually do. That's why the conclusion "very few Americans were living in poverty" cannot be drawn based on the mean.} +@Q{Why do you think `modes` returns a List?} +@A{If `modes` only returned a Number, there would be no way to indicate if there are multiple modes.} +} + +} + + +@ifproglang{codap}{ +To compute the mean and median in CODAP, create a graph of randomly distributed points, then drag a quantitative column to the x-axis. From the `Measure` menu, select Mean or Median. *If this information is not on your Data Visualizations Organizer, add it now!* } + +@lesson-instruction{ +- Open the @starter-file{animals} in @proglang. +- Complete @printable-exercise{summarizing-columns-moc.adoc}, using @proglang to compute and record all three measures of center for the `pounds` column. Write your responses on the table in question 1. +- Respond to the remaining questions using the information you have recorded on the table. +} + +@teacher{Question 3 requires students to _apply_ their knowledge of mean *and* median, which can be quite difficult. Commonly, students' understanding of center does not extend beyond algorithms. Invite students to think back to what they know about histograms and histogram shape. Challenge them to think deeply about how a histogram's shape relates to its measures of center. We will continue to consider this topic in the next lesson section.} + + @slidebreak + +Let's summarize some of the key ideas we encountered while thinking about the best measure of center to summarize the pounds column of the animals dataset. + @QandA{ -@Q{Given the extreme income inequality in the United States, what measure of center would best represent a typical family income?} -@A{The median} + +@Q{When is mean probably the best measure of center to use?} +@A{The @vocab{mean} is a useful summary number when all of the points in a dataset are fairly balanced on either side of the middle.} + + +@Q{Although mean is generally the best measure of center, statisticians sometimes fall back to the median. When is median the best measure of center to use?} +@A{For skewed datasets, the @vocab{median} is a better summary value because it is less sensitive to skew. Mean is misleading for datasets with imbalance and extreme outliers.} + +@Q{In what situations is mode the best measure of center?} +@A{The @vocab{mode} is a useful measure of center when we have a dataset with a small number of values. Mode is also our only measure of center that can be used with categorical data.} } @slidebreak -Consider how many policies or laws are informed by statistics like this! Knowing about measures of center helps us see through misleading statements. +Consider how many policies or laws are informed by statistics! Knowing about measures of center helps us see through and critique misleading statements. -*Here are some guidelines for when to use which measure of center:* - -- If the data doesn’t show much skewness or have outliers, @vocab{mean} is the best summary because it incorporates information from every value. -- If the data has noticeable outliers or skewness, @vocab{median} gives a better summary of center than the mean. -- If there are very few possible values, such as AP Scores (1–5), @vocab{mode(s)} could be a useful way to summarize the dataset. @slidebreak @lesson-instruction{ -- Choose a column from the Animals dataset and complete the second half of @printable-exercise{summarizing-columns-moc.adoc}. As you work, think about what the measures of center tell you about the shape of the dataset. -- Then complete @printable-exercise{critiquing-findings.adoc}. (You will be computing these measures of center without @proglang.) -- Practice the Data Cycle with measures of center, using @printable-exercise{data-cycle-practice.adoc}. +- Use @proglang to complete @printable-exercise{critiquing-findings.adoc}. +- Practice the Data Cycle with measures of center using @printable-exercise{data-cycle-practice.adoc}. } === Synthesize -- What did you learn? -- What questions surfaced? -- How did you know whether the questions on @printable-exercise{data-cycle-practice.adoc} were Arithmetic or Statistical? - -@pd-slide{ -A lot of math books talk about mean, median, and mode before they talk about box plots or histograms. At Bootstrap, we believe that is a mistake. - -_Kids need to see the shape of the data first_, otherwise mean, median and mode are just formulas to memorize. +@QandA{ +@Q{Do you trust this statement?: _In 2003, the average American family earned $43,000 a year -- well above the poverty line! Therefore, very few Americans were living in poverty._ Why or why not?} +@A{Sample response: The mean is sensitive to outliers, and billionaires like Elon Musk, Jeff Bezos, etc. pull the mean heavily to the right. This makes it appear that the "average" American family earns far more than they actually do. That's why the conclusion "very few Americans were living in poverty" cannot be drawn based on the mean.} -Looking at the shape of the data puts ground under your feet. Otherwise, you're too focused on numbers, and measures of center don't actually mean anything. +@Q{Given the extreme income inequality in the United States, what measure of center would best represent a typical family income?} +@A{The median} } == Data Exploration Project (Measures of Center) @@ -412,10 +355,10 @@ Students apply what they have learned about measures of center to their chosen d } === Launch -Let’s review what we have learned about computing and interpreting three measures of center - mean, median, and modes. +Let’s review what we have learned about computing and interpreting three measures of center - mean, median, and mode(s). @QandA{ -@Q{Describe how to compute mean, median, and modes.} +@Q{Describe how to compute mean, median, and mode(s).} @Q{When does @vocab{mean} provide the best summary?} @A{It includes information from every single point, so it is useful when the data doesn't show much skewness or have outliers.} @Q{When does @vocab{median} provide the best summary?} @@ -443,7 +386,7 @@ Complete @printable-exercise{data-cycle-practice-2.adoc, "two Data Cycles"} that *It’s time to add to your @starter-file{exploration-project}.* - Locate the "Measures of Center and Spread" section of your Exploration Project and, in the slide following the example, replace `Column A` with the title of the column you just investigated. -- Then type in the mean, median and modes that you just identified. Leave the other rows blank. We will come back to them another day. +- Then type in the mean, median and mode(s) that you just identified. Leave the other rows blank. We will come back to them another day. - On the next slide, repeat with `Column B` using the second column you're interested in. } @@ -468,3 +411,6 @@ Complete @printable-exercise{data-cycle-practice-2.adoc, "two Data Cycles"} that == Additional Exercises - @opt-starter-file{matching-modes} + + + diff --git a/lessons/Data-Science/measures-of-center/langs/en-us/pages/critiquing-findings.adoc b/lessons/Data-Science/measures-of-center/langs/en-us/pages/critiquing-findings.adoc index 4370477d9f2..73232c4a04c 100644 --- a/lessons/Data-Science/measures-of-center/langs/en-us/pages/critiquing-findings.adoc +++ b/lessons/Data-Science/measures-of-center/langs/en-us/pages/critiquing-findings.adoc @@ -3,12 +3,12 @@ Consider the following dataset, representing the heaviest bench press (in lbs) for ten powerlifters: +@vspace{1ex} + ---- 135, 95, 230, 135, 203, 55, 1075, 135, 110, 185 ---- -@vspace{3ex} - @n In the space below, rewrite this dataset in sorted order. @ifsoln{55, 95, 110, 135, 135, 135, 185, 203, 230, 1075} @@ -20,7 +20,7 @@ lbs) for ten powerlifters: [cols="^1a,^1a,^1a", options="header"] |=== | Mean (Average) | Median | Mode(s) -| @ifsoln{235.8} | @ifsoln{135} | @ifsoln{135} @vspace{4ex} +| @ifsoln{235.8} | @ifsoln{135} | @ifsoln{135} @vspace{6ex} |=== @vspace{3ex} diff --git a/lessons/Data-Science/measures-of-center/langs/en-us/pages/data-cycle-practice.adoc b/lessons/Data-Science/measures-of-center/langs/en-us/pages/data-cycle-practice.adoc index 9600fe667f5..e327d0907b9 100644 --- a/lessons/Data-Science/measures-of-center/langs/en-us/pages/data-cycle-practice.adoc +++ b/lessons/Data-Science/measures-of-center/langs/en-us/pages/data-cycle-practice.adoc @@ -16,7 +16,7 @@ #:show-build? #f #:expression '(mean animals-table "age") #:show-expression? #f - #:finding "The mean age of animals is 4.18" + #:finding "The mean age of animals is roughly 4.36" #:show-finding? #f #:new-question "" #:show-new-question? #t diff --git a/lessons/Data-Science/measures-of-center/langs/en-us/pages/summarizing-columns-moc.adoc b/lessons/Data-Science/measures-of-center/langs/en-us/pages/summarizing-columns-moc.adoc index 26d0f9d7582..e481a70e715 100644 --- a/lessons/Data-Science/measures-of-center/langs/en-us/pages/summarizing-columns-moc.adoc +++ b/lessons/Data-Science/measures-of-center/langs/en-us/pages/summarizing-columns-moc.adoc @@ -1,4 +1,4 @@ -= Summarizing Columns with Measures of Center += Choosing the Best Measure of Center ++++ ++++ -== Summarizing the `Pounds` Column - -Find the measures of center to summarize the @fitb{20ex}{pounds} column of the @starter-file{animals}. +@link-instructions{Find the measures of center to summarize the `pounds` column of the @starter-file{animals}, then respond to the prompts.} @n The three measures of center for this column are: @@ -29,45 +27,45 @@ Find the measures of center to summarize the @fitb{20ex}{pounds} column of the @ | @ifsoln{approx. 39.7} | @ifsoln{11.3} | @ifsoln{[list: 0.1, 6.5]} |=== -@n To take the average of a column, we add all the numbers in that column and divide by the number of rows. Will that work for every column? -@fitb{}{@ifsoln{No! We can only find the average of @vocab{quantitative} data.For example, the mean of a list of Presidents doesn’t make sense.}} +@n If we scan the dataset, we can quickly see that *most* of the animals weigh less than the mean weight. Why is the average so high? @fitb{}{@ifsoln{Kujo and Mr. Peanutbutter! Their weights skew the average.}} -@fitb{}{@ifsoln{Same thing for a list of zip codes: even though we can divide a sum of zip codes, the output doesn’t correspond to some “center” zip code.}} +@fitb{}{} -@n The mean is @fitbruby{25ex}{@ifsoln{higher than}}{higher than/lower than/about equal to} the median, which suggests the shape is @fitbruby{45ex}{@ifsoln{skewed right}}{skewed right (high outliers) / skewed left (low outliers) / symmetric}. +@n Referring to the pounds column of the Animals dataset, fill in the blanks: -@n Which do you think is the most useful measure for this column of data? Why? @fitb{}{@ifsoln{The median is}} +- Outliers on the right pull the mean the right, causing the mean to be @fitbruby{25ex}{@ifsoln{greater than}}{greater than / less than} the median. When the mean is greater than the median, the shape of the data is @fitbruby{25ex}{@ifsoln{skewed right}}{skewed right / skewed left }. -@fitb{}{@ifsoln{because there are likely high outliers pulling the mean to be higher than what's typical}} +- Outliers on the left pull the mean to the left, causing the mean to be @fitbruby{25ex}{@ifsoln{less than}}{greater than / less than} the median. When the mean is less than the median, the shape of the data is @fitbruby{25ex}{@ifsoln{skewed left}}{skewed right / skewed left }. -@star For which column(s) in the animals table do you think the modes might be a good measure of center? Why? -@fitb{}{@ifsoln{Legs! Because there aren't very many values to choose from.}} -== Summarizing the @fitb{20ex}{} Column +@n In the dot plot below, identify which line is the median and which is the mode. (You can refer to the table at the top of the page.) Label the lines. -Find the measures of center to summarize the @fitbruby{20ex}{}{a column of your choosing!} column of the @starter-file{animals}. +@center{@image{../images/num-line-pounds2.png, 400}} -The three measures of center for this column are: +- Which has more data clustered quite close to it, the median or the mean? @fitb{}{@ifsoln{median}} +- Which do you think better represents the data, the median or the mean? Why? @fitb{}{@ifsoln{The median is more appropriate;}} -[cols="^1a,^1a,^1a",options="header"] -|=== -| Mean (Average) | Median | Mode(s) -| | | -|=== +@fitb{}{@ifsoln{high outliers are pulling the mean to be higher than what is typical.}} -The mean is @fitbruby{25ex}{}{higher than/lower than/about equal to} the median, which suggests the shape is @fitbruby{45ex}{}{skewed right (high outliers) / skewed left (low outliers) / symmetric}. +@n What did you learn from calculating the mode? @fitb{}{@ifsoln{The most common animal weights are 0.1 and 6.5!}} -@star Four animals weighing 5, 5, 10, and 100 pounds will have an average mean of 30 pounds. + -@hspace{3em}(_because_ @math{5 + 5 + 10 + 100 = 120} _and_ @math{120 \div 4 = 30}) +@fitb{}{@ifsoln{That's well below our mean and even our median, which is further evidence of outliers or skewness.}} -@vspace{1ex} +@ifproglang{pyret}{ +@n In the Interactions area of the @starter-file{animals}, type `modes(animals-table, "species")`. What does Pyret return? @fitb{}{@ifsoln{[list: "dog"]}} +} -Can you think of another set of four animals that would have the same average? How many sets can you come up with? +@n Are there any measures of center that we can use for categorical data? @fitb{}{@ifsoln{Yes, mode!}} -@fitb{}{} +@n For which quantitative column(s) in the animals table do you think the modes might be a good measure of center? Why? @fitb{}{} -@fitb{}{} +@fitb{}{@ifsoln{Legs, because there aren't very many values to choose from. We can also find the mode of categorical columns, such as species.}} -@fitb{}{} + +@n To take the average of a column, we add all the numbers in that column and divide by the number of rows. Will that work for every column? + +@fitb{}{@ifsoln{No! We can only find the average of @vocab{quantitative} data. For example, the mean of a list of Presidents doesn't make sense.}} + +@fitb{}{@ifsoln{Same thing for a list of zip codes: even though we can divide a sum of zip codes, the output doesn't correspond to some “center” zip code.}} diff --git a/lessons/Data-Science/measures-of-center/langs/en-us/pages/workbook-pages.txt b/lessons/Data-Science/measures-of-center/langs/en-us/pages/workbook-pages.txt index 2b90dfa81ca..259eef6887c 100644 --- a/lessons/Data-Science/measures-of-center/langs/en-us/pages/workbook-pages.txt +++ b/lessons/Data-Science/measures-of-center/langs/en-us/pages/workbook-pages.txt @@ -1,5 +1,5 @@ notes-measures-of-center.adoc -typical.adoc +mean-median-modes.adoc summarizing-columns-moc.adoc critiquing-findings.adoc data-cycle-practice.adoc diff --git a/lessons/Data-Science/scatter-plots/langs/en-us/index.adoc b/lessons/Data-Science/scatter-plots/langs/en-us/index.adoc index a6eebb08edc..d258804294b 100644 --- a/lessons/Data-Science/scatter-plots/langs/en-us/index.adoc +++ b/lessons/Data-Science/scatter-plots/langs/en-us/index.adoc @@ -21,7 +21,7 @@ @objectives @objective{explanatory-response} @objective{scatter-plots-by-hand} -@objective{scatter-plots-pyret} +@objective{scatter-plots-proglang} @objective{trends-in-scatter-plots} @objective{when-subsets-make-sense} diff --git a/lessons/Data-Science/variability/langs/en-us/assessments/assessments.adoc b/lessons/Data-Science/variability/langs/en-us/assessments/assessments.adoc new file mode 100644 index 00000000000..1422d31ba1b --- /dev/null +++ b/lessons/Data-Science/variability/langs/en-us/assessments/assessments.adoc @@ -0,0 +1,22 @@ += Assessments + +== Published Assessment + +- Desmos File: + +== Question Text + +@n question 1 + +- objective: + +@n question 2 + +- objective: + +== Links to Documents Used for Thinking and Generating Content + +- Desmos file? +- Pyret file? +- Google Doc? +- Google Sheet? \ No newline at end of file diff --git a/lessons/Data-Science/variability/langs/en-us/images/brush-teeth-min.png b/lessons/Data-Science/variability/langs/en-us/images/brush-teeth-min.png new file mode 100644 index 00000000000..fc02088c966 Binary files /dev/null and b/lessons/Data-Science/variability/langs/en-us/images/brush-teeth-min.png differ diff --git a/lessons/Data-Science/variability/langs/en-us/images/cat-dot-plot.png b/lessons/Data-Science/variability/langs/en-us/images/cat-dot-plot.png new file mode 100644 index 00000000000..3d4310a7580 Binary files /dev/null and b/lessons/Data-Science/variability/langs/en-us/images/cat-dot-plot.png differ diff --git a/lessons/Data-Science/variability/langs/en-us/images/dog-dot-plot.png b/lessons/Data-Science/variability/langs/en-us/images/dog-dot-plot.png new file mode 100644 index 00000000000..1b4e37cd121 Binary files /dev/null and b/lessons/Data-Science/variability/langs/en-us/images/dog-dot-plot.png differ diff --git a/lessons/Data-Science/variability/langs/en-us/images/get-ready-min.png b/lessons/Data-Science/variability/langs/en-us/images/get-ready-min.png new file mode 100644 index 00000000000..be02ba53faa Binary files /dev/null and b/lessons/Data-Science/variability/langs/en-us/images/get-ready-min.png differ diff --git a/lessons/Data-Science/variability/langs/en-us/images/lesson-images.json b/lessons/Data-Science/variability/langs/en-us/images/lesson-images.json new file mode 100644 index 00000000000..17af088d799 --- /dev/null +++ b/lessons/Data-Science/variability/langs/en-us/images/lesson-images.json @@ -0,0 +1,62 @@ +{ + "cat-dot-plot.png": { + "description": "A dot plot showing the distribution of cats' weights", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "rabbit-dot-plot.png": { + "description": "A dot plot showing the distribution of rabbits' weights", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "tarantula-dot-plot.png": { + "description": "A dot plot showing the distribution of tarantulas' weights", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "brush-teeth-min.png": { + "description": "A dot plot showing the distribution of tarantulas' weights", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "dog-dot-plot.png": { + "description": "A dot plot showing the distribution of dogs' weights", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "get-ready-min.png": { + "description": "A dot plot showing the distribution minutes", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "school-sleep.png": { + "description": "A dot plot showing the distribution hours spent sleeping", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "sixth-grade-ages.png": { + "description": "A dot plot showing the distribution ages", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "tvs-per-house.png": { + "description": "A dot plot showing the distribution of tvs", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "tvs-per-room.png": { + "description": "A dot plot showing the distribution of tvs", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "weekend-sleep.png": { + "description": "A dot plot showing the distribution of hours spent sleeping", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + }, + "whole-school-ages.png": { + "description": "A dot plot showing the distribution of ages", + "source" : "Created by the Bootstrap Team based on contrived data", + "license" : "Creative Commons 4.0 - NC - SA" + } +} diff --git a/lessons/Data-Science/variability/langs/en-us/images/rabbit-dot-plot.png b/lessons/Data-Science/variability/langs/en-us/images/rabbit-dot-plot.png new file mode 100644 index 00000000000..b9d4e8dd4ee Binary files /dev/null and b/lessons/Data-Science/variability/langs/en-us/images/rabbit-dot-plot.png differ diff --git a/lessons/Data-Science/variability/langs/en-us/images/school-sleep.png b/lessons/Data-Science/variability/langs/en-us/images/school-sleep.png new file mode 100644 index 00000000000..30b6ed7bd50 Binary files /dev/null and b/lessons/Data-Science/variability/langs/en-us/images/school-sleep.png differ diff --git a/lessons/Data-Science/variability/langs/en-us/images/sixth-grade-ages.png b/lessons/Data-Science/variability/langs/en-us/images/sixth-grade-ages.png new file mode 100644 index 00000000000..1465a7dcc08 Binary files /dev/null and b/lessons/Data-Science/variability/langs/en-us/images/sixth-grade-ages.png differ diff --git a/lessons/Data-Science/variability/langs/en-us/images/tarantula-dot-plot.png b/lessons/Data-Science/variability/langs/en-us/images/tarantula-dot-plot.png new file mode 100644 index 00000000000..cd65a9b9078 Binary files /dev/null and b/lessons/Data-Science/variability/langs/en-us/images/tarantula-dot-plot.png differ diff --git a/lessons/Data-Science/variability/langs/en-us/images/tvs-per-house.png b/lessons/Data-Science/variability/langs/en-us/images/tvs-per-house.png new file mode 100644 index 00000000000..23e8bd1e1c7 Binary files /dev/null and b/lessons/Data-Science/variability/langs/en-us/images/tvs-per-house.png differ diff --git a/lessons/Data-Science/variability/langs/en-us/images/tvs-per-room.png b/lessons/Data-Science/variability/langs/en-us/images/tvs-per-room.png new file mode 100644 index 00000000000..add4de14e00 Binary files /dev/null and b/lessons/Data-Science/variability/langs/en-us/images/tvs-per-room.png differ diff --git a/lessons/Data-Science/variability/langs/en-us/images/weekend-sleep.png b/lessons/Data-Science/variability/langs/en-us/images/weekend-sleep.png new file mode 100644 index 00000000000..07308ab71bd Binary files /dev/null and b/lessons/Data-Science/variability/langs/en-us/images/weekend-sleep.png differ diff --git a/lessons/Data-Science/variability/langs/en-us/images/whole-school-ages.png b/lessons/Data-Science/variability/langs/en-us/images/whole-school-ages.png new file mode 100644 index 00000000000..835a7774cb6 Binary files /dev/null and b/lessons/Data-Science/variability/langs/en-us/images/whole-school-ages.png differ diff --git a/lessons/Data-Science/variability/langs/en-us/index.adoc b/lessons/Data-Science/variability/langs/en-us/index.adoc new file mode 100644 index 00000000000..70b2f3231fb --- /dev/null +++ b/lessons/Data-Science/variability/langs/en-us/index.adoc @@ -0,0 +1,218 @@ += Variability + +@description{Students define variability multiple ways, and then describe different levels of variability on dot plots.} + +@ifproglang{pyret}{ +@lesson-prereqs{contracts-tables-visualizations, ds-intro, dot-plots} +} + +@ifproglang{codap}{ +@lesson-prereqs{codap-dot-plots-bar-charts} +} + +@keywords{variability} + +@add-to-lang{histogram} + +[@lesson-intro-table] +|=== +| Lesson Goals +| Students will be able to... + +@objectives + + +| Student-facing Lesson Goals +| + +- Let's think about variability of categorical and quantitative data. + +| Materials +|[.materials-links] + +@material-links + + + +|=== + + + +== Variability Two Ways + +@objective{variability-define} + +=== Overview + +Students define variability two ways, and then apply that understanding to describe the variability of categorical and quantitative data. + +=== Launch + +In our discussion of @lesson-link{dot-plots}, we learned to describe the distribution of a dataset in terms of outliers, clusters, peaks, and gaps. We also considered what's typical -- or expected -- in the data. This lesson focuses on another way to describe a dataset, its @vocab{variability}. + +@vocab{Statistical questions} are questions that anticipate @vocab{variability}. + +@teacher{Students should already be familiar with statistical questions from @lesson-link{data-cycle}.} + +@QandA{ + +@Q{Which question anticipates variability: _How many minutes are in an hour?_ or _How many minutes does it take to get to school?_ Explain your response.} +@A{Question B anticipates variability. The time it takes to drive to school will vary based on who you ask, where they live, mode of transportation, time of day, road conditions, traffic, etc.} + +@A{The answer to Question A will always be 60.} +} + +@strategy{There are Many Ways to Think about Variability!}{ + +Research indicates that students often have an oversimplified and underdeveloped view of variability (@citation{cooper-2018}; @citation{cooper-shore-2008}). + +In this lesson, we intentionally begin our conversation by developing intuitive ideas about variability, for instance: + +- Variability requires us to consider the data as an entity, rather than as individual points. +- We can try to understand why things vary and try to identify reasons for variability. +- Some things vary a little, and some vary a lot. +- We see variability in both quantitative and categorical datasets. + +This last recommendation is an important one: research indicates that it is more natural to understand how like or unlike categorical data is than it is to understand variation about the mean (@citation{kade-perry-2007}), therefore reasoning about variability in categorical datasets can act as a natural starting point. + +That said, we urge you to explicitly emphasize that how alike or different the data points are is just one of many ways to think about variability. Fixating on this definition of variability can result in students developing the common misconception that levelness of histogram bars indicates low variability (@citation{cooper-shore-2008}). +} + + +=== Investigate + +In a categorical dataset, we can judge variability based on how different or alike the data points are. +Let's think about the variability of some categorical datasets. + +@QandA{ + +Complete the first section of questions on @printable-exercise{variability-two-ways.adoc}. Then we'll pause to discuss them as a class. + +@Q{In Sana's grocery bag, she has 12 apples and 1 banana. +In Juliette's grocery bag, she has 4 peaches, 4 kiwis, 4 oranges, and 1 limes. +Which dataset - Sana's groceries or Juliette's groceries - has greater variability? +} + +@A{Sample response: Juliette's grocery bag has greater variability, as the items in her bag are more different from one another than the items in Sana's bag. If students are inclined to consider the amount of each item, remind them that this is a categorical dataset. +} + +@Q{You ask a group of sixth grade students to respond to two different statements with either "true" or "false." Statement A is _I am in sixth grade_, and statement B is _I am wearing blue today._ Which statement do you predict will produce greater variability? +} + +@A{Sample response: Given that the students you are sampling are in sixth grade, there will not be any variability in their responses to statement A. Everyone will choose "true". For statement B, however, we expect variability, because it is likely that some students will be wearing blue and some will not". +} +} + +@slidebreak + +@lesson-instruction{ +Complete @printable-exercise{variability-two-ways.adoc}. +} + +@slidebreak + +@QandA{ + +@Q{Do you agree or disagree that students in our class generally have the same number of letters in our first name? +} +@A{Sample response: I disagree. The data spreads out from 3 letters to 14 letters. If all students had the generally same number of letters in their names, most or all of name lengths would be equivalent. +} + +@Q{Which dataset do you predict will have greater variability for a group of ninth graders who attend the same school - Wake-up times on Wednesday or Saturday? +} +@A{Sample response: Saturday wake-up times probably has greater variability. On a school day, everyone needs to wake up in time to get to school, but on Saturday, some students may choose to sleep in later. +} +} + +@teacher{ + +Students often believe that variability can be judged based solely on the *range* of a dataset (@citation{cooper-shore-2008}). Although we will focus on range for the remainder of this lesson, acknowledge to students that there are *many* other ways to quantify variability. The dialogue about variability that begins in this lesson will continue (and gain nuance) during our lessons on @lesson-link{histograms-visualize}, @lesson-link{box-plots}, and @lesson-link{standard-deviation}. +} + +=== Synthesize + +@teacher{Before facilitating a whole class discussion, you might want to have students exchange the datasets they made on the third section of @printable-exercise{variability-two-ways.adoc} with a partner and discuss their strategies for determining the variability of each dataset.} + +@QandA{ + +@Q{How did your strategies for assessing variability change, if at all, when you looked at a categorical dataset versus a quantitative dataset?} + +@Q{If two datasets have the same range, how can we decide which one has greater variability?} +@A{Although students will probably *not* be able to answer this question concretely (e.g. use interquartile range, mean absolute deviation, or standard deviation), it is a good opportunity to see if they are developing intuition about variability as deviation from the center. You can invite students to share, and then reveal that they will uncover the answers to this question later!} + +} + +== Visualizing Variability with Dot Plots + +@objective{variability-describe} + + +=== Overview + +Students connect dot plots to different scenarios based on the variability. They learn how to create dot plots in @proglang to investigate the distribution of data in dot plots. + + +=== Launch + +Let's investigate how different levels of variability appear on dot plots. + +@lesson-instruction{ +- The person who created the dot plots on @printable-exercise{variability-of-dot-plots.adoc} forgot to label them. +- To complete the page: Fill in the blanks in the first column with either "A" (if the description matches dot plot A) or "B" (if the description matches dot plot B), then explain your choice in the last column. +} + +@QandA{ + +@Q{What strategies did you use to match labels with dot plots?} +@A{Possible responses: I considered the range of the data; I asked myself which scenario would produce data with greater variability; I envisioned in my head what the dot plot would look like, etc.} + +@Q{Can you think of any similar pairs of datasets that would produce dot plots with differing levels of variability?} +@A{Possible responses: minutes 9 year-olds spend talking on the phone versus minutes 18 year-olds spend talking on the phone; time to run a mile for professional athletes versus a group of high school students; etc.} +} + + +=== Investigate + +The folks at the animal shelter want to approximate the amount of food they need to purchase for the coming month. They know there is a relationship between an animal's weight and how much it eats, so they are discussing the distribution of animals' weights. + +@lesson-instruction{ +- With a partner, complete the first section of @printable-exercise{animal-weight-variability.adoc}. +} + +@teacher{ +Review students' responses, first ensuring that students are able to estimate what's typical in a dataset (question 1).} + +@QandA{ +@Q{How did you decide what species has the greatest and least variability?} +@A{Responses will vary. Ideally, students are thinking about the possible weight range for each animal, recognizing that there are some extremely large breeds of dogs, but that most tarantulas are generally the same size.} + +@Q{How did you describe the distribution of dogs' weights?} +@A{Responses will vary. Students should acknowledge that a peak exists at approximately 55 pounds, and that there is a gap between the cluster of light- to mid-weight dogs and the few very heavy outliers.} +} + +It's time to make dot plots in @proglang! + +@ifproglang{pyret}{ +Here is the contract for creating a dot plot in Pyret: + +@show{(contract 'dot-plot '((table-name Table) (labels String) (column-name String)) "Image")} +} + + +@lesson-instruction{ +- Open the @starter-file{expanded-animals-sub-pops} and click "Run". +- Use it to complete the second section of @printable-exercise{animal-weight-variability.adoc}, making dot plots for each species in @proglang and responding to the prompts on the table. +} + +@teacher{ +We've defined some helper functions in rows 15-18 of the @starter-file{expanded-animals-sub-pops}. Interested students can learn more about helper functions during @lesson-link{filtering-and-building}. Students need not develop a strong understanding of helper functions to complete the activities in this lesson. +} + +=== Synthesize + +@QandA{ +@Q{You've been asked to estimate what's typical of a dataset several times. How do you think the variability of a dataset affects typicality?} + +@A{When a dataset is highly variable, the spread is wide and there is a greater likelihood that there are outliers; both of these affect typicality. For instance, a high outlier on the right increases what's typical. If there is low variability, it is generally easier to predict what is typical. If there is *no* variability, we know what is typical because the dataset contains only a single value.} + +} diff --git a/lessons/Data-Science/variability/langs/en-us/pages/animal-weight-variability.adoc b/lessons/Data-Science/variability/langs/en-us/pages/animal-weight-variability.adoc new file mode 100644 index 00000000000..1a3102ab4d4 --- /dev/null +++ b/lessons/Data-Science/variability/langs/en-us/pages/animal-weight-variability.adoc @@ -0,0 +1,84 @@ += Variability of Animals' Weights + +== Make Your Predictions + +@link-instructions{ +The folks at the shelter want to approximate the amount of food they need to purchase for the coming month. They know there is a relationship between an animal's weight and how much it eats, so they are discussing the distribution of animals' weights. Help them out by responding to the prompts below. For question 7, you'll need to use the @starter-file{expanded-animals-sub-pops}. +} + +@n Imagine a _typical_ animal from each of these four species. Rank the animals from lightest (1) to heaviest (4). + +@hspace{2em} dog: @fitb{7em}{@ifsoln{4}} @hspace{2em} rabbit: @fitb{7em}{@ifsoln{2}} @hspace{2em} cat: @fitb{7em}{@ifsoln{3}} @hspace{2em} tarantula: @fitb{7em}{@ifsoln{1}} + +@n Circle the species you expect to have the _greatest_ variability in weight: @hspace{2em} @ifsoln-choice{dog @hspace{5em}} rabbit @hspace{5em} cat @hspace{5em} tarantula + +@n Circle the species you expect to have the _least_ variability in weight: @hspace{3em} dog @hspace{5em} rabbit @hspace{5em} cat @hspace{5em} @ifsoln-choice{tarantula} + +@n The dot plots below display the weight distributions of cats, rabbits, and tarantulas. Identify the species of each plot. + +[cols="<.>1a, <.>1a, 1a", grid="none", frame="none", stripes="none"] +|=== + +| species: @fitb{}{@ifsoln{rabbit}} + +| species: @fitb{}{@ifsoln{tarantula}} + +| species: @fitb{}{@ifsoln{cat}} + +| @image{../images/rabbit-dot-plot.png, 350} + +| @image{../images/tarantula-dot-plot.png, 350} + +| @image{../images/cat-dot-plot.png, 300} + +|=== + +@n Explain how you made your decisions. @fitb{}{} + +@fitb{}{@ifsoln{Of the 3 species, cats weigh the most and their weights have the most variability, while tarantulas weigh the least and also have the least variability.}} + +@fitb{}{} + +@fitb{}{} + + +== Test Your Predictions Using Pyret + +@n Using the @starter-file{expanded-animals-sub-pops}, build a dot plot for each species. In your code, use the tables defined on lines 22-25. Use information from your dot plots to fill in the cells. You can hover your mouse over specific points on the dot plot for additional information on an individual animal. Some cells have been completed for you. + + +[cols="1,2,2,2,2", options="header"] +|=== + +| +| dogs +| cats +| rabbits +| tarantula + +| Range/variability | 3-172 lbs | @ifsoln{0.8-13.4 lbs} | @ifsoln{1.5-5 lbs} | @ifsoln{0.03 - 0.375 lbs} + +| Gaps | 123-161 lbs | @ifsoln{11-13.4 lbs} | _No significant gaps_ | _No significant gaps_ + +| Outliers +| Kujo (172 lbs) + +Mr. PB (161 lbs) +| @ifsoln{Hercules (13.4 lbs)} +| _No significant outliers_ +| _No significant outliers_ + + +| Peak(s) | 72 pounds | @ifsoln{8.5 pounds} | @ifsoln{1.5, 3.3, 3.5, 4.2 pounds} | @ifsoln{0.2 pounds} + +| Typical weight | about 55 lbs | @ifsoln{about 8 lbs} | @ifsoln{about 3.5 lbs} | @ifsoln{about 0.2 lbs} + + +|=== + + +@n Purchasing pet food would be simpler if staff at the shelter can plan to feed each individual pet of a species the _exact same quantity of food_. Do you think this possible for any of the four species (without over- or underfeeding any of the animals)? Explain. If not, can you make any recommendations about quantity of food to purchase? @fitb{}{@ifsoln{Feeding each dog the same amount of food is definitely not a good idea. It's probably not a good idea to feed the cats, rabbits, or tarantulas }} + +@fitb{}{@ifsoln{ the same amount of food, either. (Students may want to engage in some debate over the smaller three species.)}} + +@fitb{}{} + +@fitb{}{} diff --git a/lessons/Data-Science/variability/langs/en-us/pages/variability-of-dot-plots.adoc b/lessons/Data-Science/variability/langs/en-us/pages/variability-of-dot-plots.adoc new file mode 100644 index 00000000000..dd99d43dc64 --- /dev/null +++ b/lessons/Data-Science/variability/langs/en-us/pages/variability-of-dot-plots.adoc @@ -0,0 +1,101 @@ +[.landscape] += Variability of Dot Plots + +++++ + +++++ + +The person who created the dot plots below forgot to label them. For each row, decide which description matches which dot plot. Then explain your choice. + +[.FillVerticalSpace, cols="1, <.11a, ^.>9a, ^.>9a, <.9a", options="header"] + +|=== + +| | Which dot plot corresponds, A or B? | Dot Plot A | Dot Plot B | Explain your choice + +| @n + +| Students' hours of sleep: + +- on Monday night: @fitb{1em}{@ifsoln{B}} + +- on Saturday night: @fitb{1em}{@ifsoln{A}} + +.>| @image{../images/weekend-sleep.png} +.>| @image{../images/school-sleep.png} +| @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +| @n + +| Ages: + +- of all sixth graders at a K-12 school: @fitb{1em}{@ifsoln{A}} + +- of all students at a K-12 school: @fitb{1em}{@ifsoln{B}} + +| @image{../images/sixth-grade-ages.png} +| @image{../images/whole-school-ages.png} +| @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +| @n +| +Weights: + +- of cats in the shelter: @fitb{1em}{@ifsoln{B}} + +- of dogs in a shelter: @fitb{1em}{@ifsoln{A}} + + +| @image{../images/dog-dot-plot.png} +| @image{../images/cat-dot-plot.png} +| @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +| @n +| +Number of minutes: + +- spent brushing teeth in a day: @fitb{1em}{@ifsoln{B}} + +- spent getting ready for school: @fitb{1em}{@ifsoln{A}} + +| @image{../images/get-ready-min.png} +| @image{../images/brush-teeth-min.png} +| @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +| @n +| +Number of televisions: + +- per household: @fitb{1em}{@ifsoln{B}} + +- per bedroom: @fitb{1em}{@ifsoln{A}} + + +| @image{../images/tvs-per-room.png} +| @image{../images/tvs-per-house.png} +| @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +|=== + diff --git a/lessons/Data-Science/variability/langs/en-us/pages/variability-two-ways.adoc b/lessons/Data-Science/variability/langs/en-us/pages/variability-two-ways.adoc new file mode 100644 index 00000000000..4ff99fe950e --- /dev/null +++ b/lessons/Data-Science/variability/langs/en-us/pages/variability-two-ways.adoc @@ -0,0 +1,78 @@ += Two Ways of Thinking about Variability + +== Variability of Categorical Data + +@vspace{1ex} + +@indented{ +[cols="^1a,^1a",options="header"] +|=== +| Sana's Groceries | Juliette's Groceries +| 12 apples and 1 banana | 4 peaches, 4 kiwis, 4 oranges, and 1 lime +|=== +} +@n Which dataset has greater variability - Sana's groceries or Juliette's groceries? Explain. @fitb{}{} + +@fitb{}{@ifsoln{Sample response: Juliette’s grocery bag has greater variability, as the items in her bag are more different from one another than the items in Sana’s bag.}} + +@fitb{}{} + +@n You ask a group of sixth grade students to respond to two different statements with either "true" or "false." + +- Statement A: _I am in sixth grade._ +- Statement B: _I am wearing blue today._ + +Which statement do you predict will produce greater variability? Explain. @fitb{}{} + +@fitb{}{} + +@fitb{}{} + +== Variability of Quantitative Data + +@n Someone looks at your class roster and says, _"In general, students in our class have the same number of letters in their first names."_ + +Do you agree or disagree? Explain your reasoning. @fitb{}{} + +@fitb{}{@ifsoln{Sample response: I disagree. In reality, the data spreads out from 3 letters to 14 letters.}} + +@fitb{}{@ifsoln{If all students had the same number of letters in their names, most or all of the name lengths would be equivalent.}} + +@n Which dataset do you predict will have greater variability for a group of ninth graders who attend the same school - wake-up times on Wednesday or Saturday? Explain. @fitb{}{} + +@fitb{}{@ifsoln{Sample response: Saturday wake-up times probably has greater variability. On a school day, everyone needs to wake up in time to get to school, but on Saturday, some students may choose to sleep in later.}} + +@n Below are the students' responses for their wake-up times on Wednesday versus Saturday. Was your prediction correct? Explain. + +- Wednesday: 6:30, 6:15, 6, 6:45, 6:30, 5:45, 6:45, 6:30, 6:30, 6:15 + +- Saturday: 7:00, 8:00, 8:30, 6:30, 9:45, 10:30, 6:00, 5:45, 10:15, 9:30 + +@fitb{}{} + +@fitb{}{} + +== Designing Datasets with High and Low Variability + +@n Make up two *categorical* datasets with 5 items, each. + +@vspace{1ex} + +@indented{ +[cols="^1a,^1a",options="header"] +|=== +| Dataset with Low Variability | Dataset with High Variability +| @vspace{3ex} | +|=== +} + +@n Make up two *quantitative* datasets with ten quantities, each. + +@vspace{1ex} + +@indented{ +[cols="^1a,^1a",options="header"] +|=== +| Dataset with Low Variability | Dataset with High Variability +| @vspace{1ex} | +|=== +} diff --git a/lessons/Data-Science/variability/langs/en-us/pages/workbook-pages.txt b/lessons/Data-Science/variability/langs/en-us/pages/workbook-pages.txt new file mode 100644 index 00000000000..241eee79233 --- /dev/null +++ b/lessons/Data-Science/variability/langs/en-us/pages/workbook-pages.txt @@ -0,0 +1,3 @@ +variability-two-ways.adoc +variability-of-dot-plots.adoc +animal-weight-variability.adoc \ No newline at end of file diff --git a/lessons/Data-Science/variability/langs/en-us/proglang.txt b/lessons/Data-Science/variability/langs/en-us/proglang.txt new file mode 100644 index 00000000000..b09e8215f92 --- /dev/null +++ b/lessons/Data-Science/variability/langs/en-us/proglang.txt @@ -0,0 +1,2 @@ +pyret +codap \ No newline at end of file diff --git a/lessons/Data-Science/variability/langs/en-us/slides-codap.id b/lessons/Data-Science/variability/langs/en-us/slides-codap.id new file mode 100644 index 00000000000..081fdab7d5a --- /dev/null +++ b/lessons/Data-Science/variability/langs/en-us/slides-codap.id @@ -0,0 +1 @@ +1K97Xfu1mxcjCVis3_v-kyCH9x8AakiTdl73uWkIcMZo diff --git a/lessons/Data-Science/variability/langs/en-us/slides-pyret.id b/lessons/Data-Science/variability/langs/en-us/slides-pyret.id new file mode 100644 index 00000000000..d1a74831991 --- /dev/null +++ b/lessons/Data-Science/variability/langs/en-us/slides-pyret.id @@ -0,0 +1 @@ +1U-hMS-iXKL13NFD4szyOWVuoXdTWPk79GilEROa8cSw diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/AnalyzeData.png b/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/AnalyzeData.png deleted file mode 100644 index 00b78018eac..00000000000 Binary files a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/AnalyzeData.png and /dev/null differ diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/AskQuestions.png b/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/AskQuestions.png deleted file mode 100644 index 9ed9eb0d63b..00000000000 Binary files a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/AskQuestions.png and /dev/null differ diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/ConsiderData.png b/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/ConsiderData.png deleted file mode 100644 index 4024e510867..00000000000 Binary files a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/ConsiderData.png and /dev/null differ diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/InterpretData.png b/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/InterpretData.png deleted file mode 100644 index c8126012480..00000000000 Binary files a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/InterpretData.png and /dev/null differ diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/data-cycle.png b/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/data-cycle.png deleted file mode 100644 index ef0bd0dd60d..00000000000 Binary files a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/data-cycle.png and /dev/null differ diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/lesson-images.json b/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/lesson-images.json deleted file mode 100644 index 66dcb11c531..00000000000 --- a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/images/lesson-images.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "histogram-a.png": { - "description": "Histogram with 6 columns whose respective heights from left to right are 1, 1, 3, 3, 1, 1", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "histogram-b.png": { - "description": "histogram with 10 bars equal in height", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "histogram-c.png": { - "description": "Histogram with 9 bars whose heights from left to right are 0,1,3,3,1,0,0,1,1", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "histogram-d.png": { - "description": "Histogram with 9 bars whose heights from left to right are 0,1,1,0,0,1,3,3,1", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "histogram-e.png": { - "description": "Histogram with 11 bars whose heights from left to right are 0,2,2,1,0,0,0,0,1,2,2", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "symmetric.png": { - "description": "A hill-shaped histogram, with both sides sloping away from the peak equally", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "left-w-foot.png": { - "description": "A hill-shaped histogram, with a clump of taller bars on the right side, and smaller bars trailing off to the left=", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "right-w-foot.png": { - "description": "A hill-shaped histogram, with a clump of taller bars on the left side, and smaller bars trailing off to the right side", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "InterpretData.png" : { - "description" : "Interpret Data", - "source" : "Created by the Bootstrap Team based on work from @link{http://introdatascience.org/, Mobilizing IDS project} and @link{https://www.amstat.org/asa/files/pdfs/GAISE/GAISEPreK12_Intro.pdf, GAISE}", - "license" : "Creative Commons 4.0 - NC - SA" - }, - "skew-right.png": { - "description": "A hill-shaped histogram, with a clump of taller bars on the left side, and smaller bars trailing off to the right", - "source" : "Created by the Bootstrap Team based on contrived data", - "license" : "Creative Commons 4.0 - NC - SA" - } -} diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/index.adoc b/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/index.adoc deleted file mode 100644 index 10092e32ca4..00000000000 --- a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/index.adoc +++ /dev/null @@ -1,200 +0,0 @@ -= Visualizing the "Shape" of Data - -@description{Students explore the concept of "shape", using histograms to determine whether a dataset has skewness, and what the direction of the skewness means. They apply this knowledge to the Animals Dataset, and then to their own.} - -@lesson-prereqs{histograms} - -@keywords{histogram, bin, interval} - -[@lesson-intro-table] -|=== -| Lesson Goals -| Students will be able to... - -@objectives -@objective{histograms-pyret} -@objective{Describe the distribution of quantitative columns of the Animals Dataset, using proper terminology.} - -| Student-facing Lesson Goals -| - -- Let's investigate what the shape of a histogram can tell us about the data. - -| Materials -|[.materials-links] - -@material-links - -|=== - - -== Describing Shape - -=== Overview -This activity focuses on _describing shape_ based on a @vocab{histogram}. Students learn about "left skewed", "right skewed", and "symmetric" data, and what those descriptions tell us about a dataset. - -=== Launch - -@vocab{Shape} is one way to quickly describe what values are more or less common in a dataset. Some might occur very frequently, while others are rare. That information can be gathered from a _distribution_ of data: any representation of the data that shows the frequency of each value (like a table, list, or chart!). - -Distributions can show where data points are clustered together or spread thin. Data Scientists spend a _lot_ of time looking at data visualizations to examine their shape, because the numbers don't tell the whole story! - -In fact, you lose a lot of insight into your dataset if you don't look at the shape. @link{https://www.research.autodesk.com/publications/same-stats-different-graphs-generating-datasets-with-varied-appearance-and-identical-statistics-through-simulated-annealing/, The Datasaurus Dozen} are a wonderful collection of dissimilar graphics whose summary statistics are identical. -@slidebreak - -@lesson-point{ -Histograms create _fixed-size_ bins, which contain varying numbers of data points. -} - -@right{@image{images/skew-right.png, 300}}We can think of the data being "stacked" in these fixed bins, like jeans in a store separated by size: one stack for Small, another for Medium, and so on. - -_The height of a histogram bar tells us how much data falls within that interval._ Taller stacks have more data points than short ones. - -Look at the image on the right: most of the data is clustered on the left side, and there are a few unusually high values way off to the right. *But how do we describe this shape, and what does it mean?* - -@slidebreak - -Let's look at some real-world examples of the most common shapes: - -*1. Skewed right, or high outliers* - -@right{@image{images/right-w-foot.png, 300}}Most points are clumped around what’s typical, but they trail off to the right with a few unusually high values (or outliers). We see this shape often in the real world. - -- The average US woman gives birth around age 26, but some do even after 45! No one is giving birth at age 7 to balance this out, so the outliers are all on the right. -- Personal income almost always shows right skewness or high outliers. There are usually a few billionaires that are far above average, and aren't balanced out by any earners that are equally far below average. - -A skew-right distribution looks like the toes on your right foot! - -@slidebreak - -*2. Skewed left, or low outliers* - -@right{@image{images/left-w-foot.png, 300}}Values are clumped around what’s typical, but they trail off to the left with a few unusually low values (or outliers). - -- Most adults have close to a full set of 32 teeth, but a few hockey players might have a very small number of teeth. Since no one has _10 extra teeth_ to balance this out, the only outliers are on the left. -- A school cafeteria mostly buys canned goods in huge sizes, but might have a few ingredients in smaller sizes. If we looked at the ounces per can we’d see a shape that has left skewness and/or low outliers. - -A skew-left distribution will look like the toes on your left foot! - -@slidebreak - -*3. Symmetric: values are balanced on either side of the middle.* - -@right{@image{images/symmetric.png, 300}}In a @vocab{symmetric} distribution, it’s just as likely for the variable to take a value a certain distance below the middle as it is to take a value that same distance above the middle. Examples: - -- It’s just as likely for a newborn baby to be a certain number of ounces below @vocab{average} weight as it is to be that number of ounces above average weight. -- At many restaurants, the busiest dinner time is around 7pm. But there are always a few people who want to eat earlier or later. - -@teacher{For those in an AP Stats class or full-year Data Science class, you may wish to include a discussion of other kinds of distributions (e.g. - normal/gaussian, unimodal, bimodal, etc..)} - -=== Investigate - -@QandA{ -@Q{Make a histogram for the pounds column in the animals table, sorting the animals into 20-pound bins.} - -@ifproglang{pyret}{ -@A{Students should enter the code: @show{(code '(histogram animals-table "name" "pounds" 20))}} - -@ifproglang{codap}{ -@A{Students should select the "Graph" button on the upper left. Drag `Pounds` to the x-axis. From the Configuration menu, select "Group into Bins" and "Fuse Dots into Bars." From the same menu, confirm that the bin width is 20.}} - -@Q{Would you describe the shape of your histogram as being @vocab{skewed left}, @vocab{skewed right}, or @vocab{symmetric}?}} -@A{The histogram is skewed left.} - -@Q{Which one of these statements is justified by the histogram’s shape: (1) A few of the animals were unusually light, (2) A few of the animals were unusually heavy, or (3) It was just as likely for an animal to be a certain amount below or above average weight. -} -@A{The 2nd statement "a few of the animals were unusually heavy" is the only one that applies, given the histogram's shape.} - -@Q{Try bins of 1-pound intervals, then 100-pound intervals. Which of these three histograms best satisfies our rule of thumb?} -@A{Our rule of thumb is that a histogram should have between 5–10 bins. The first histogram we made - with 20-pound bins - had a total of ten bins, so it best satisfies our rule.} -} - -@slidebreak - -@lesson-instruction{ -- On @printable-exercise{identifying-shape-histograms.adoc}, describe the shape of the histograms you see there. -- On @printable-exercise{data-cycle-animals-shape.adoc}, describe the pounds histogram and another one you make yourself. When writing down what you notice, try to use the language Data Scientists use, discussing both skew and outliers. -} - -@slidebreak - - -@clear - -@strategy{What Shape Makes Sense?}{ -If time allows, here's a great way to get students walking around and thinking more deeply about distributions! - -Using flip-chart paper or whiteboard space, designate poster-sized regions around the classroom titled "Symmetric", "Skew Left", and "Skew Right". You may want to have 2-3 of each, depending on the number of students and size of the classroom. Divide the class into teams, such that each group takes a region of the room. - -Each team looks at the region they're in front of, and must (a) draw a histogram with that shape and (b) _brainstorm a sample that would likely result in that distribution_. Once each team has completed the task, the teams rotate to the next poster and brainstorm another sample. They complete this until every team has come up with at least one unique example for @vocab{symmetric}, @vocab{skew left}, and @vocab{skew right} distributions. -} - -=== Synthesize -- For which distributions was it easiest to come up with an example? -- For which distributions was it hardest to come up with an example? - -@slidebreak - -Histograms are a powerful way to visualize a dataset and see its @vocab{shape}. But shape is just one of three key aspects that tell us what’s going on with a @vocab{quantitative} column of a dataset. We will also want to learn about center and spread! - -== Data Exploration Project (Visualizing Shape) - -=== Overview - -Students apply what they have learned about visualizing shape to the histograms they have created for their chosen dataset. They will add to their @starter-file{exploration-project} a more detailed interpretation of their histograms using new vocabulary. - -@teacher{Visit @lesson-link{project-data-exploration} to learn more about the sequence and scope. Teachers with time and interest can build on the exploration by inviting students to take a deep dive into the questions they develop with our @lesson-link{project-research-paper}. -} - -=== Launch - -Let’s review what we have learned about visualizing the shape of data. - -@QandA{ -@Q{Describe a histogram that is _skewed right_. Are its outliers high or low?} -@A{Values are clumped around what's typical, with low outliers.} - -@Q{Describe a histogram that is _skewed left_. Are its outliers high or low?} -@A{Values are clumped around what's typical, with high outliers.} - -@Q{Describe a histogram that is symmetric.} -@A{It’s just as likely for the variable to take a value a certain distance below the middle as it is to take a value that same distance above the middle.} -} - -=== Investigate - -Let’s connect what we know about visualizing the shape of the data to the histograms we created for your chosen dataset. - -@lesson-instruction{ -- Open your chosen dataset starter file in @proglang. -- For this analysis, you'll want to look at the Data Cycle that you completed during the Histograms lesson. -- Recreate the histograms that you made before. Now, edit and expand your discussion so that it uses the new vocabulary that you've used. -} - -@teacher{@opt{If your students who need a fresh copy of the Data Cycle template, distribute @opt-printable-exercise{data-cycle-quantitative.adoc}.}} - -@slidebreak - -@lesson-instruction{ -*It’s time to add to your @starter-file{exploration-project}.* - -- For each of the histograms that you have added, edit and / or expand upon the interpretations you provided during the Histograms lesson. -- Be sure to integrate the new vocabulary we have learned, including: @vocab{shape}, @vocab{skewed left}, @vocab{skewed right}, and @vocab{symmetric}. -- Describe what this shape tells you about the quantitative column you chose. -} - -=== Synthesize - -@teacher{Have students share their findings.} - -- What @vocab{shape}s did you notice in your histograms? -- Did you discover anything surprising or interesting about your dataset? -- Were there any surprises when you compared your findings with other students? - -@scrub{ -//// -== Additional Exercises - -- Project: @opt-printable-exercise{word-length.adoc} - A mini-project in which students use a histogram to plot the length of words in different texts. -//// -} diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/pages/identifying-shape-histograms.adoc b/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/pages/identifying-shape-histograms.adoc deleted file mode 100644 index 952b2f62c43..00000000000 --- a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/pages/identifying-shape-histograms.adoc +++ /dev/null @@ -1,13 +0,0 @@ -= Identifying Shape - Histograms - -Describe the shape of the histograms on the left. Do your best to incorporate the vocabulary you've been introduced to. - -[.FillVerticalSpace, cols="^.^2a,^.^10a, 25a", stripes="none", frame="none"] -|=== -| 1 | @centered-image{../images/histogram-a.png, 200} | @ifsoln{Symmetric} -| 2 | @centered-image{../images/histogram-b.png, 200} | @ifsoln{Symmetric} -| 3 | @centered-image{../images/histogram-c.png, 200} | @ifsoln{Skew right} -| 4 | @centered-image{../images/histogram-d.png, 200} | @ifsoln{Skew left} -| 5 | @centered-image{../images/histogram-e.png, 200} | @ifsoln{_This is a little bit of a trick question...it's technically symmetric, but it's really bimodal._} - -|=== diff --git a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/pages/workbook-pages.txt b/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/pages/workbook-pages.txt deleted file mode 100644 index 77338319946..00000000000 --- a/lessons/Data-Science/visualizing-the-shape-of-data/langs/en-us/pages/workbook-pages.txt +++ /dev/null @@ -1,2 +0,0 @@ -identifying-shape-histograms.adoc -data-cycle-animals-shape.adoc diff --git a/lessons/Hour-of-Code/hoc-data/langs/en-us/index.adoc b/lessons/Hour-of-Code/hoc-data/langs/en-us/index.adoc index bbc08fb7782..4ead344abba 100644 --- a/lessons/Hour-of-Code/hoc-data/langs/en-us/index.adoc +++ b/lessons/Hour-of-Code/hoc-data/langs/en-us/index.adoc @@ -16,7 +16,7 @@ @description{A self-guided Desmos activity for exploring a real dataset, and using it to make sense of a real-world problem.} -In this stand-alone Bootstrap:Data Science lesson students will explore data visualization, introductory programming using The Pyret Programming Language, and the application of Data Science to solve real problems. Students will investigate a data set of vehicle-wildlife collisions in Vermont and make recommendations about where to build wildlife overpasses. +In this stand-alone Bootstrap:Data Science lesson students will explore data visualization, introductory programming using The Pyret Programming Language, and the application of Data Science to solve real problems. Students will investigate a dataset of vehicle-wildlife collisions in Vermont and make recommendations about where to build wildlife overpasses. @left@image{images/moose.png, 380}, @right{@image{images/Logo.png, 250}} diff --git a/lessons/Projects/project-data-exploration/langs/en-us/index.adoc b/lessons/Projects/project-data-exploration/langs/en-us/index.adoc index 35c0b9144d2..727afe68518 100644 --- a/lessons/Projects/project-data-exploration/langs/en-us/index.adoc +++ b/lessons/Projects/project-data-exploration/langs/en-us/index.adoc @@ -11,8 +11,8 @@ | Students will be able to... @objectives -@objective{measures-of-center-pyret} -@objective{measures-of-spread-pyret} +@objective{measures-of-center-proglang} +@objective{measures-of-spread-proglang} - create a variety of visualizations from a dataset of their choosing | Student-facing Lesson Goals @@ -78,8 +78,8 @@ Once students have a real world dataset to focus on, they will apply what they l - @lesson-link{choosing-your-dataset} (since bar @ifproglang{pyret}{and pie} charts @ifproglang{codap}{and dot plots} are introduced in @ifproglang{pyret}{@lesson-link{bar-and-pie-charts}}@ifproglang{codap}{@lesson-link{codap-dot-plots-bar-charts}}, which comes before they'll have a dataset to focus on) -- @lesson-link{histograms} -- @lesson-link{visualizing-the-shape-of-data} +- @lesson-link{histograms-visualize} +- @lesson-link{histograms-interpret} - @lesson-link{measures-of-center} - @lesson-link{box-plots} - @lesson-link{standard-deviation} diff --git a/lessons/Projects/project-snack-habits/langs/en-us/index.adoc b/lessons/Projects/project-snack-habits/langs/en-us/index.adoc index 788853cfae4..da7c277531e 100644 --- a/lessons/Projects/project-snack-habits/langs/en-us/index.adoc +++ b/lessons/Projects/project-snack-habits/langs/en-us/index.adoc @@ -60,7 +60,7 @@ === Overview -Students track their snacking habits over the course of 5 days to generate a collective data set. In preparation, they'll identify the data types of each variable. By revisiting the spreadsheet the class is generating throughout the data collection phase, you'll have an opportunity to discuss what it means to clean a data set. +Students track their snacking habits over the course of 5 days to generate a collective dataset. In preparation, they'll identify the data types of each variable. By revisiting the spreadsheet the class is generating throughout the data collection phase, you'll have an opportunity to discuss what it means to clean a dataset. @teacher{Note: This phase of the project should be started right away, but does not need to be completed before moving on to the Researching U.S. Snacking Habits phase of this project.} @@ -265,7 +265,7 @@ Once finished, encourage students to self-assess and revise their work. If time === Synthesize - What were the pros and cons of working with data generated by you and your classmates? -- What other data do you wish had been part of our collective data set? What other questions would you suggest adding to the form? +- What other data do you wish had been part of our collective dataset? What other questions would you suggest adding to the form? @teacher{ - Decide what form of sharing their projects works best for you. diff --git a/lessons/Projects/project-stress-or-chill/langs/en-us/pages/wip-project-stress-or-chill.adoc b/lessons/Projects/project-stress-or-chill/langs/en-us/pages/wip-project-stress-or-chill.adoc index 56e3849ad8a..1c4535f5264 100644 --- a/lessons/Projects/project-stress-or-chill/langs/en-us/pages/wip-project-stress-or-chill.adoc +++ b/lessons/Projects/project-stress-or-chill/langs/en-us/pages/wip-project-stress-or-chill.adoc @@ -90,7 +90,7 @@ Add more slides if you need! === Synthesize - What were the pros and cons of working with data generated by you and your classmates? -- What other data do you wish had been part of our collective data set? What other questions would you suggest adding to the form? +- What other data do you wish had been part of our collective dataset? What other questions would you suggest adding to the form? @teacher{ - Once finished, encourage students to self-assess and revise their work. diff --git a/lessons/Projects/project-time-use/langs/en-us/pages/wip-project-Time-Use.adoc b/lessons/Projects/project-time-use/langs/en-us/pages/wip-project-Time-Use.adoc index 9bad1f5fc2a..952768ce918 100644 --- a/lessons/Projects/project-time-use/langs/en-us/pages/wip-project-Time-Use.adoc +++ b/lessons/Projects/project-time-use/langs/en-us/pages/wip-project-Time-Use.adoc @@ -31,7 +31,7 @@ | Facilitation Note | -Before students will be able to get to the analysis phase of this project, they will need at least 5 days to collect the data to generate the collective data set. Including at least one weekend for tracking snack habits will likely lead to richer data. +Before students will be able to get to the analysis phase of this project, they will need at least 5 days to collect the data to generate the collective dataset. Including at least one weekend for tracking snack habits will likely lead to richer data. |=== @@ -113,7 +113,7 @@ Add more slides if you need! === Synthesize - What were the pros and cons of working with data generated by you and your classmates? -- What other data do you wish had been part of our collective data set? What other questions would you suggest adding to the form? +- What other data do you wish had been part of our collective dataset? What other questions would you suggest adding to the form? @teacher{ - Once finished, encourage students to self-assess and revise their work. diff --git a/lessons/__Authoring/__sample-lesson/langs/en-us/authoring/useful-asciidoc-commands.adoc b/lessons/__Authoring/__sample-lesson/langs/en-us/authoring/useful-asciidoc-commands.adoc index bb197ea771a..bfc769aaeb3 100644 --- a/lessons/__Authoring/__sample-lesson/langs/en-us/authoring/useful-asciidoc-commands.adoc +++ b/lessons/__Authoring/__sample-lesson/langs/en-us/authoring/useful-asciidoc-commands.adoc @@ -169,7 +169,7 @@ The other formatting defines where in the cell the words end up being placed (se ==== Aligning text within the table - without a dot, we mean horizontal alignment. -- With a dot, it's vertical alignment. +- With a dot before it, we mean vertical alignment. - < means to the left (horizontally) or top (vertically). - ^ is center (horizontally or vertically) - > pushes to the right (horizontally) or bottom (vertically). diff --git a/lessons/__Authoring/__sample-lesson/langs/en-us/authoring/useful-css-rules.adoc b/lessons/__Authoring/__sample-lesson/langs/en-us/authoring/useful-css-rules.adoc index b598d55be2a..daa1b7368fd 100644 --- a/lessons/__Authoring/__sample-lesson/langs/en-us/authoring/useful-css-rules.adoc +++ b/lessons/__Authoring/__sample-lesson/langs/en-us/authoring/useful-css-rules.adoc @@ -5,7 +5,17 @@ Emmanuel suggests: “Google for CSS+Whatever+I+Am+Trying+To+Do” * For autonumbering inside a table ++++ +++++ + +* To place letters in the top left corner in table cells + +++++ + ++++ diff --git a/lessons/__Authoring/__sample-lesson/langs/en-us/authoring/useful-unicode.adoc b/lessons/__Authoring/__sample-lesson/langs/en-us/authoring/useful-unicode.adoc index 059e1b370b4..7282328fec9 100644 --- a/lessons/__Authoring/__sample-lesson/langs/en-us/authoring/useful-unicode.adoc +++ b/lessons/__Authoring/__sample-lesson/langs/en-us/authoring/useful-unicode.adoc @@ -4,6 +4,7 @@ * greater than or equal to ≥ * star ★ * right arrow → +* unchecked check box ☐ * subscript 2 ₂ * subscript 1 ₁ diff --git a/lessons/__Authoring/__sample-lesson/langs/en-us/index.adoc b/lessons/__Authoring/__sample-lesson/langs/en-us/index.adoc index 650cdb5b207..c6c3d01f9aa 100644 --- a/lessons/__Authoring/__sample-lesson/langs/en-us/index.adoc +++ b/lessons/__Authoring/__sample-lesson/langs/en-us/index.adoc @@ -14,6 +14,8 @@ deadlift. @math{3a = 2h + 1i} or @math{3=2+1} @keywords{larry, curly, moe} +This is a common misconception (@citation{groth-bergner-2006}). + [@lesson-intro-table] |=== @@ -42,10 +44,19 @@ deadlift. @math{3a = 2h + 1i} or @math{3=2+1} | Supplemental Resources | +| Preparation +| @preparation{ +- Decide how much choice you're ready to offer your students before you begin. Research shows that choice increases student engagement! But focusing the whole class on a single dataset is also an option. + * Would focusing your students on a single dataset make this doable for you? Because you teach younger students who might need more scaffolding? Or because you are new to teaching data science and managing fewer moving parts would increase your confidence? @ifproglang{pyret}{We recommend focusing on @opt-starter-file{food}.} +} + +| Key Points for the Facilitator +| *Authors: Lesson section titles should mean something to teachers who haven't worked with previous Bootstrap lessons* + |=== -== Graphing logarithmic models __f(x) = a log~b~ x + c__ +== Graphing logarithmic models __f(x) = a log~b~ x + c__ @objective{expressions-exponents} @objective{develop-statistical-question} diff --git a/lib/bootstraplesson.js b/lib/bootstraplesson.js index ace51e869da..8763af83dbb 100644 --- a/lib/bootstraplesson.js +++ b/lib/bootstraplesson.js @@ -994,7 +994,7 @@ async function makeCustomBook(title, lessons, optPages=false, pageNums) { } const TOC = await makeTOC(); - const {path, bytes} = await makeWorkbook( + const {path, bytes, warnings} = await makeWorkbook( PDFLib, title, pageUrls, @@ -1007,6 +1007,8 @@ async function makeCustomBook(title, lessons, optPages=false, pageNums) { optPages, pageNums, ); + warnings.forEach(({path, length}) => + console.log(`WARNING: in ${title}, ${path} has ${length} pages`)); await showPDFAndCleanup(file, bytes); } @@ -1049,6 +1051,7 @@ async function makeWorkbook( const [TOCpage] = await workbook.copyPages(doc, [0]); } + let warnings = []; for (const [idx, path] of paths.entries()) { // Read the file, calling onProcess hook const bytes = await onProcessPage(idx, path); @@ -1062,8 +1065,9 @@ async function makeWorkbook( if((pages.length > 1) && !knownLongPages.includes(fileName)) { // Only warn for longworkbook, to eliminate redundant warnings if (longworkbook) { - console.log(`WARNING: in ${course} ${outputPath.split('/').pop()}, - ${path} has ${pages.length} pages`); + // save an abbreviated path (just the lesson, pages folder, and filename) to the file, + // along with the number of pages + warnings.push({path: path.split('/').slice(-3).join('/'), length: pages.length}); } pages = pages.splice(0, 1); } @@ -1121,7 +1125,7 @@ async function makeWorkbook( workbook.catalog.set(PDFName.of('PageLabels'), pageLabels); const pdfBytes = await workbook.save(); - return {path: outputPath, bytes: pdfBytes}; + return {path: outputPath, bytes: pdfBytes, warnings: warnings}; } /******************************************* * EVENT HANDLERS diff --git a/lib/glossary-terms.json b/lib/glossary-terms.json index df6ac8f9312..26668c8aa85 100644 --- a/lib/glossary-terms.json +++ b/lib/glossary-terms.json @@ -220,6 +220,15 @@ } }, { + "en-us": { + "keywords": [[ "central tendency" ]], + "description": "a summary measure, that attempts to describe a whole set of data with a single value that represents the middle or center of its distribution. In other words, a value that all the others tend to cluster around." + }, + "es-mx": { + "keywords": [[ "" ]], + "description": "" + } + },{ "en-us": { "keywords": [[ "circle of evaluation", "circles of evaluation" ]], "description": "a diagram of the structure of a mathematical expression (Bootstrap-specific)" diff --git a/lib/html2pdf.js b/lib/html2pdf.js index 69909b77a7d..67003d124a7 100644 --- a/lib/html2pdf.js +++ b/lib/html2pdf.js @@ -306,6 +306,7 @@ async function HtmlToPdf(htmlFileSpecs, tabsPerCore=2) { // Wait for cluster to idle and close it await cluster.idle(); await cluster.close(); + // If there are warnings, trim them to only show unique lesson/pages/filename paths if(warnings.length > 0) { // use only the lesson, folder, and filename (drop everything else) warnings = warnings.map(w => w.split("/").slice(-3).join("/")); diff --git a/lib/makeWorkbook.js b/lib/makeWorkbook.js index ef3620dc13f..8806256cb3a 100644 --- a/lib/makeWorkbook.js +++ b/lib/makeWorkbook.js @@ -24,7 +24,7 @@ async function makeWorkbookFromSpecList(specList, outputPath, longworkbook = fal const paths = specList.map(spec => topdir + "/" + spec.file); // call the core workbook generator - const {path, bytes} = await makeWorkbook( + const {path, bytes, warnings} = await makeWorkbook( pdfLib, process.env["COURSE_DIR"], paths, @@ -32,7 +32,6 @@ async function makeWorkbookFromSpecList(specList, outputPath, longworkbook = fal async (idx, path) => await fs.readFile(path), // onProcessPage: read the file false, longworkbook); - const doc = await pdfLib.PDFDocument.load(bytes); const pageCount = doc.getPageCount(); @@ -68,19 +67,27 @@ async function makeWorkbookFromSpecList(specList, outputPath, longworkbook = fal } // write the book await fs.writeFile(outputPath, bytes); + return warnings; } async function makeAllWorkbooks() { - await makeWorkbookFromSpecList(optExercisesFiles, prefix + "/workbook/opt-exercises.pdf"); - await makeWorkbookFromSpecList(optExercisesSolsFiles, prefix + "/resources/protected/opt-exercises-sols.pdf"); - await makeWorkbookFromSpecList(workbookFiles, prefix + "/workbook/workbook.pdf"); - await makeWorkbookFromSpecList(workbookSolsFiles, prefix + "/resources/protected/workbook-sols.pdf"); - await makeWorkbookFromSpecList(workbookLongFiles, prefix + "/workbook/workbook-long.pdf", true); - await makeWorkbookFromSpecList(workbookLongSolsFiles, prefix + "/resources/protected/workbook-long-sols.pdf", true); + let warnings = []; + warnings.push(await makeWorkbookFromSpecList(optExercisesFiles, prefix + "/workbook/opt-exercises.pdf")); + warnings.push(await makeWorkbookFromSpecList(optExercisesSolsFiles, prefix + "/resources/protected/opt-exercises-sols.pdf")); + warnings.push(await makeWorkbookFromSpecList(workbookFiles, prefix + "/workbook/workbook.pdf")); + warnings.push(await makeWorkbookFromSpecList(workbookSolsFiles, prefix + "/resources/protected/workbook-sols.pdf")); + warnings.push(await makeWorkbookFromSpecList(workbookLongFiles, prefix + "/workbook/workbook-long.pdf", true)); + warnings.push(await makeWorkbookFromSpecList(workbookLongSolsFiles, prefix + "/resources/protected/workbook-long-sols.pdf", true)); if (makeMasterWorkbook) { - await makeWorkbookFromSpecList(pdWorkbookFiles, prefix + "/resources/protected/pd-workbook.pdf"); + warnings.push(await makeWorkbookFromSpecList(pdWorkbookFiles, prefix + "/resources/protected/pd-workbook.pdf")); // await makeWorkbook(pdWorkbookLongFiles, prefix + "/resources/protected/pd-workbook-long.pdf"); } + const uniqueWarnings = new Set(warnings.flat().map( ({path, length}) => + `WARNING: ${path} has ${length} pages`)); + if(uniqueWarnings.size > 0) { + console.warn(`\nIn the ${process.env["COURSE_DIR"]} workbook...`); + [...uniqueWarnings].sort().forEach( w => console.warn(w)); + } } makeAllWorkbooks() diff --git a/lib/maker/Makefile.phase1 b/lib/maker/Makefile.phase1 index 584114fa184..dfaa12a24a6 100644 --- a/lib/maker/Makefile.phase1 +++ b/lib/maker/Makefile.phase1 @@ -1,6 +1,6 @@ include $(MAKE_DIR)utils.mk -phase1: initialization .git/hooks/pre-commit node_modules courses-and-lessons page-not-found glossary bootstraplessonjs starterFiles assessments learningObjectives dictionaries +phase1: initialization .git/hooks/pre-commit node_modules courses-and-lessons page-not-found glossary bootstraplessonjs starterFiles assessments learningObjectives dictionaries citations ############################################################################### @@ -166,6 +166,15 @@ distribution/$(NATLANG)/learningObjectives.js: $(PROGDIR)/combine-dicts.js $(PRO @echo ➤ Build $@ @node $+ $@ + +############################################################################### + +citations: distribution/$(NATLANG)/citations.js + +distribution/$(NATLANG)/citations.js: $(PROGDIR)/combine-dicts.js $(PROGDIR)/citations + @echo ➤ Build $@ + @node $+ $@ + ############################################################################### courses-and-lessons: copy-lessons copy-pathways diff --git a/lib/preproc.rkt b/lib/preproc.rkt index 2782d5c3fdd..453094506c0 100644 --- a/lib/preproc.rkt +++ b/lib/preproc.rkt @@ -196,6 +196,15 @@ (read-json i))) '()))) +(define *citations* + (let ([citations-file (format "distribution/~a/citations.js" *natlang*)]) + (if (file-exists? citations-file) + (call-with-input-file citations-file + (lambda (i) + (read i) (read i) (read i) + (read-json i))) + '()))) + (define *objectives-met* '()) (define *assessments-met* '()) @@ -1970,6 +1979,32 @@ (cons (cons url title) *assessments-met*))) (fprintf o "link:pass:[~a][~a]" url title)]))] + [(string=? directive "citation") + (let* ([args (read-commaed-group i directive read-group)] + [args-len (length args)] + [lbl (string->symbol (first args))] + [c (hash-ref *citations* lbl #f)] + [in-text (and c (hash-ref c 'in-text #f))] + [apa (and c (hash-ref c 'apa #f))]) + (cond [(> (length args) 1) + (set! in-text + (expand-directives:string->string + (second args)))] + [in-text + (set! in-text + (expand-directives:string->string in-text))] + [else lbl]) + (unless apa (set! apa lbl)) + (cond [(not c) + (printf "WARNING: ~a: Undefined @~a ~a\n\n" + (errmessage-context) directive lbl)] + [(not in-text) + (printf "WARNING: ~a: @~a ~a missing\n\n" + (errmessage-context) directive lbl)] + [else (display + (enclose-span ".citation" + (string-append in-text (enclose-span ".apa-citation" apa))) + o)]))] [(string=? directive "objectives") (fprintf o "\ninclude::~a/{cachedir}.index-objectives.asc[]\n" *containing-directory*)] [(string=? directive "objective") diff --git a/lib/shared.less b/lib/shared.less index 014ba908c1d..865fcdb38a4 100644 --- a/lib/shared.less +++ b/lib/shared.less @@ -64,6 +64,25 @@ body { .ulist ul { margin-bottom: 0px; } .ulist p { margin: 5px 0px; } .quad { display: inline-block; } + .citation { + position: relative; + .apa-citation { display: none; } + &:hover { + .apa-citation { + display: inline-block; + position: absolute; + top: 100%; + left: 0; + width: 300px; + background: lightyellow; + font-size: 6pt; + line-height: initial; + border: 1px solid gray; + padding: 5px; + } + } + + } /* Make .mathunicode and MathJax look similar */ .MathJax, .mathunicode { diff --git a/lib/spellcheck.js b/lib/spellcheck.js index 151fc52116c..cb21dec2945 100644 --- a/lib/spellcheck.js +++ b/lib/spellcheck.js @@ -142,7 +142,8 @@ const added_words = [ "Roboto", "unclosed", "autonum", - "Piecewise" + "Piecewise", + "middle" ]; const ignored_words = [ diff --git a/pathways/data-literacy-codap/langs/en-us/lesson-order.txt b/pathways/data-literacy-codap/langs/en-us/lesson-order.txt index 2d87589da2c..8f7b15977e9 100644 --- a/pathways/data-literacy-codap/langs/en-us/lesson-order.txt +++ b/pathways/data-literacy-codap/langs/en-us/lesson-order.txt @@ -8,8 +8,8 @@ project-snack-habits probability-inference choosing-your-dataset project-data-exploration -histograms -visualizing-the-shape-of-data +histograms-visualize +histograms-interpret measures-of-center box-plots standard-deviation diff --git a/pathways/data-literacy-codap/langs/en-us/resources/pages/implementation-options.adoc b/pathways/data-literacy-codap/langs/en-us/resources/pages/implementation-options.adoc index a46df647c27..efb6136c660 100644 --- a/pathways/data-literacy-codap/langs/en-us/resources/pages/implementation-options.adoc +++ b/pathways/data-literacy-codap/langs/en-us/resources/pages/implementation-options.adoc @@ -93,8 +93,8 @@ Students choose a real dataset, or create their own! They explore this dataset, [.Lessons] * @lesson-link{choosing-your-dataset} -* @lesson-link{histograms} -* @lesson-link{visualizing-the-shape-of-data} +* @lesson-link{histograms-visualize} +* @lesson-link{histograms-interpret} * @lesson-link{measures-of-center} * @lesson-link{box-plots} * @lesson-link{standard-deviation} @@ -217,7 +217,7 @@ In addition to whatever project you want your students to do with the data from * @lesson-link{codap-dot-plots-bar-charts} | === ...then choose what you need -** @lesson-link{visualizing-the-shape-of-data} +** @lesson-link{histograms-interpret} ** @lesson-link{measures-of-center} ** @lesson-link{box-plots} ** @lesson-link{scatter-plots} diff --git a/pathways/data-literacy/langs/en-us/lesson-order.txt b/pathways/data-literacy/langs/en-us/lesson-order.txt index ed2ab107038..5b080d4392b 100644 --- a/pathways/data-literacy/langs/en-us/lesson-order.txt +++ b/pathways/data-literacy/langs/en-us/lesson-order.txt @@ -13,10 +13,11 @@ data-cycle ; DS --> project-snack-habits probability-inference ; DS choosing-your-dataset ; DS project-data-exploration ; project -; dot-plots-and-variability ; DS -histograms ; DS -visualizing-the-shape-of-data ; DS +dot-plots ; DS +variability ; DS measures-of-center ; DS +histograms-visualize ; DS +histograms-interpret ; DS box-plots ; DS ; visualizing-the-spread-of-data standard-deviation ; DS diff --git a/pathways/data-literacy/langs/en-us/resources/pages/implementation-options.adoc b/pathways/data-literacy/langs/en-us/resources/pages/implementation-options.adoc index 243dcee2a04..0ef4c298622 100644 --- a/pathways/data-literacy/langs/en-us/resources/pages/implementation-options.adoc +++ b/pathways/data-literacy/langs/en-us/resources/pages/implementation-options.adoc @@ -95,8 +95,8 @@ Students choose a real dataset, or create their own! They explore this dataset, [.Lessons] * @lesson-link{choosing-your-dataset} -* @lesson-link{histograms} -* @lesson-link{visualizing-the-shape-of-data} +* @lesson-link{histograms-visualize} +* @lesson-link{histograms-interpret} * @lesson-link{measures-of-center} * @lesson-link{box-plots} * @lesson-link{standard-deviation} @@ -238,7 +238,7 @@ In addition to whatever project you want your students to do with the data from * @lesson-link{bar-and-pie-charts} | === ...then choose what you need -** @lesson-link{visualizing-the-shape-of-data} +** @lesson-link{histograms-interpret} ** @lesson-link{measures-of-center} ** @lesson-link{box-plots} ** @lesson-link{scatter-plots} diff --git a/pathways/data-science/langs/en-us/lesson-order.txt b/pathways/data-science/langs/en-us/lesson-order.txt index 9d44047d76b..28c39c38afd 100644 --- a/pathways/data-science/langs/en-us/lesson-order.txt +++ b/pathways/data-science/langs/en-us/lesson-order.txt @@ -1,73 +1,70 @@ -computing-needs-all-voices ; equity -ds-intro ; DS +computing-needs-all-voices ; equity +ds-intro ; DS ; PROGRAMMING 1 -simple-data-types ; programming -contracts ; programming -project-logo ; project +simple-data-types ; programming +contracts +project-logo ; project contracts-tables ; programming contracts-visualizations ; programming -bar-and-pie-charts ; DS --> project-infographic -functions-make-life-easier ; programming -functions-examples-definitions ; programming -functions-dr ; programming --> project-function +bar-and-pie-charts ; DS --> project-infographic +functions-make-life-easier ; programming +functions-examples-definitions ; programming ; STATISTICS 1 -data-cycle ; DS --> project-snack-habits -lookups ; programming -defining-table-functions ; DS, programming -filtering-and-building ; DS, programming -project-data-exploration ; project -choosing-your-dataset ; DS +data-cycle ; DS --> project-snack-habits +lookups ; programming +dot-plots ; DS +variability ; DS +project-data-exploration ; project +choosing-your-dataset ; DS ; PROGRAMMING 2 -; dot-plots-and-variability ; DS -scatter-plots ; DS -advanced-visualizations ; DS, programming --> project-beautiful-data -composing-table-operations ; DS, programming +functions-dr ; programming --> project-function +defining-table-functions ; DS, programming +scatter-plots ; DS +advanced-visualizations ; DS, programming --> project-beautiful-data +filtering-and-building ; DS, programming +composing-table-operations ; DS, programming ; STATISTICS 2 -grouped-samples ; DS, programming -probability-inference ; DS -ethics-privacy-and-bias ; DS -data-collection ; DS --> project-design-a-survey -measures-of-center ; DS -; variability ; DS -histograms ; DS -visualizing-the-shape-of-data ; DS -box-plots ; DS -; visualizing-the-spread-of-data ; DS -standard-deviation ; DS -fitting-models ; DS, programming -correlations ; DS -linear-regression ; DS +grouped-samples ; DS, programming +probability-inference ; DS +ethics-privacy-and-bias ; DS +data-collection ; DS --> project-design-a-survey +measures-of-center ; DS +histograms-visualize ; DS +histograms-interpret ; DS +variability ; DS +box-plots ; DS +standard-deviation ; DS +fitting-models ; DS, programming +correlations ; DS +linear-regression ; DS ; STATISTICS 3 -checking-your-work ; DS, programming -threats-to-validity ; DS --> project-threats -project-research-paper ; project +checking-your-work ; DS, programming +threats-to-validity ; DS --> project-threats +project-research-paper ; project ; MODELING ; LINEAR MODELS ; linear1-exploring-states -; linear2-building-models ; Math -; linear3-fitting-models ; Math -; linear4-other-forms ; Math - +; linear2-building-models ; Math +; linear3-fitting-models ; Math +; linear4-other-forms ; Math ; QUADRATIC MODELS -; quadratic1-exploring-mpg -; quadratic2-building-models ; Math -; quadratic3-fitting-models ; Math -; quadratic4-other-forms ; Math - +; quadratic1-exploring-mpg +; quadratic2-building-models ; Math +; quadratic3-fitting-models ; Math +; quadratic4-other-forms ; Math ; EXPONENTIAL MODELS ; exponential1-exploring-covid -; exponential2-building-models ; Math -; exponential3-fitting-models ; Math -; exponential4-simpsons-paradox ; Math - +; exponential2-building-models ; Math +; exponential3-fitting-models ; Math +; exponential4-simpsons-paradox ; Math ; LOGARITHMIC MODELS ; logarithmic1-exploring-wealth-health -; logarithmic2-building-models ; Math -; logarithmic3-changing-scale ; Math -; logarithmic4-linearization ; Math \ No newline at end of file +; logarithmic2-building-models ; Math +; logarithmic3-changing-scale ; Math +; logarithmic4-linearization ; Math \ No newline at end of file diff --git a/pathways/data-science/langs/en-us/resources/pages/implementation-options.adoc b/pathways/data-science/langs/en-us/resources/pages/implementation-options.adoc index d385fe59682..b7d184d2eb1 100644 --- a/pathways/data-science/langs/en-us/resources/pages/implementation-options.adoc +++ b/pathways/data-science/langs/en-us/resources/pages/implementation-options.adoc @@ -95,8 +95,8 @@ Students choose a real dataset, or create their own! They explore this dataset, [.Lessons] * @lesson-link{choosing-your-dataset} -* @lesson-link{histograms} -* @lesson-link{visualizing-the-shape-of-data} +* @lesson-link{histograms-visualize} +* @lesson-link{histograms-interpret} * @lesson-link{measures-of-center} * @lesson-link{box-plots} * @lesson-link{standard-deviation} @@ -216,7 +216,7 @@ This format includes multiple project-based options, including @lesson-link{proj == Just a taste of Data Science @duration{1 to 4 weeks} -A module with minimial programming, designed for: +A module with minimal programming, designed for: - Science teachers who want students to gather data and generate charts for lab reports - Math teachers who want students to experiment with charts and plots @@ -237,7 +237,7 @@ In addition to whatever project you want your students to do with the data from * @lesson-link{bar-and-pie-charts} | === ...then choose what you need -** @lesson-link{visualizing-the-shape-of-data} +** @lesson-link{histograms-interpret} ** @lesson-link{measures-of-center} ** @lesson-link{box-plots} ** @lesson-link{scatter-plots} diff --git a/pathways/math-grade6/langs/en-us/lesson-order.txt b/pathways/math-grade6/langs/en-us/lesson-order.txt index f1f67dc52a7..bf2d8c9f73b 100644 --- a/pathways/math-grade6/langs/en-us/lesson-order.txt +++ b/pathways/math-grade6/langs/en-us/lesson-order.txt @@ -20,7 +20,8 @@ surface-area-rect-prism ; inequalities2-compound ; inequalities3-sam ds-intro -histograms +histograms-visualize +histograms-interpret data-collection diff --git a/pathways/math-grade7/langs/en-us/lesson-order.txt b/pathways/math-grade7/langs/en-us/lesson-order.txt index 5da602c7532..0ba20e3e992 100644 --- a/pathways/math-grade7/langs/en-us/lesson-order.txt +++ b/pathways/math-grade7/langs/en-us/lesson-order.txt @@ -21,7 +21,8 @@ contracts-tables ; programming contracts-visualizations ; programming bar-and-pie-charts probability-inference -histograms +histograms-visualize +histograms-interpret measures-of-center box-plots function-composition diff --git a/pathways/ok/langs/en-us/lesson-order.txt b/pathways/ok/langs/en-us/lesson-order.txt index c3bb82fdc49..6d251be928a 100644 --- a/pathways/ok/langs/en-us/lesson-order.txt +++ b/pathways/ok/langs/en-us/lesson-order.txt @@ -28,8 +28,8 @@ bar-and-pie-charts data-cycle probability-inference ; DS choosing-your-dataset ; DS -histograms ; DS -visualizing-the-shape-of-data ; DS +histograms-visualize +histograms-interpret ; DS measures-of-center ; DS box-plots ; DS standard-deviation ; DS diff --git a/shared/langs/en-us/citations/citations.json b/shared/langs/en-us/citations/citations.json new file mode 100644 index 00000000000..55cbe3fbed1 --- /dev/null +++ b/shared/langs/en-us/citations/citations.json @@ -0,0 +1,68 @@ +{ + "groth-bergner-2006": { + "in-text" : "Groth & Bergner, 2006", + "apa" : "Groth, R. E., & Bergner, J. A. (2006). Preservice Elementary Teachers’ Conceptual and Procedural Knowledge of Mean, Median, and Mode. _Mathematical Thinking and Learning_, 8(1), 37–63.", + "public-url": "https://www.researchgate.net/publication/233103005_Preservice_Elementary_Teachers'_Conceptual_and_Procedural_Knowledge_of_Mean_Median_and_Mode", + "private-url" : "https://drive.google.com/file/d/1xqWlY8diwo2UubdKiSxbEDhV0T3DV5hm/view?usp=drive_link" + }, + "pollatsek-et-al-1981":{ + "in-text": "Pollatsek et al, 1981", + "apa": "Pollatsek, A., Lima, S. & Well, A.D. Concept or computation: Students' understanding of the mean. _Educational Studies in Mathematics_, 12, 191–204 (1981).", + "public-url": "https://www.researchgate.net/publication/226879251_Concept_or_computation_Students'_understanding_of_the_mean", + "private-url": "https://drive.google.com/file/d/1EVSCwRzoUJhBW2IAQcUujac9ciFr8Tai/view?usp=drive_link" + }, + "bakker-et-al-2005":{ + "in-text": "Bakker et al, 2005", + "apa": "Bakker, A., Biehler, R., & Konold, C. (2005). Should young students learn about box plots? In G. Burrill & M. Camden (Eds.), _Curricular Development in Statistics Education: International Association for Statistical Education (IASE) Roundtable_, Lund, Sweden, 28 June-3 July 2004.", + "public-url": "https://www.researchgate.net/publication/233864567_Should_young_students_learn_about_box_plots", + "private-url": "https://drive.google.com/file/d/1I6ioAUSrgF4MO_ZwdILOUa1ebHESTyTb/view?usp=drive_link" + }, + "cooper-shore-2008":{ + "in-text": "Cooper & Shore, 2008", + "apa": "Cooper, L., & Shore, F. S. (2008). Students’ Misconceptions in Interpreting Center and Variability of Data Represented via Histograms and Stem-and-Leaf Plots. _Journal of Statistics Education_, 16(2), 1.", + "public-url": "https://www.researchgate.net/publication/228875489_Students'_Misconceptions_in_Interpreting_Center_and_Variability_of_Data_Represented_via_Histograms_and_Stem-and-Leaf_Plots", + "private-url": "https://drive.google.com/file/d/100E6gT0Uin9DroFyuyMEqf7QL9oZaVPA/view?usp=drive_link" + }, + "lem-et-al-2013":{ + "in-text": "Lem et al, 2013", + "apa": "Lem, S., Onghena, P., Verschaffel, L., & Van Dooren, W. (2013). On the misinterpretation of histograms and box plots. _Educational Psychology_, 33, 155 - 174.", + "public-url": "https://www.sciencedirect.com/science/article/abs/pii/S0959475213000029", + "private-url": "https://drive.google.com/file/d/1nbFdr61q3xWEhpex_blRPh0F5xCbV-Nj/view?usp=drive_link" + }, + "cooper-2018":{ + "in-text": "Cooper, 2018", + "apa": "Cooper, L. (2018). Assessing Students’ Understanding of Variability in Graphical Representations that Share the Common Attribute of Bars. _Journal of Statistics Education_, 26(2), 110–124.", + "public-url": "https://www.tandfonline.com/doi/full/10.1080/10691898.2018.1473060", + "private-url": "https://drive.google.com/file/d/1dZkzWfD5kvLE6M7qUJEm90g6_L_7oSgp/view?usp=drive_link" + }, + "kade-perry-2007":{ + "in-text": "Kader & Perry, 2007", + "apa": "G.D. Kader, M. Perry, Variability for categorical variables, _Journal of Statistics Education_, 15(2) (2007).", + "public-url": "", + "private-url": "" + }, + "kaplan-et-al-2014": { + "in-text": "Kaplan et al, 2014", + "apa": "Kaplan, J., Gabrosek, J., Curtiss, P., & Malone, C. (2014). Investigating student understanding of histograms. _Journal of Statistics Education_, 22(2).", + "public-url": "https://www.tandfonline.com/doi/pdf/10.1080/10691898.2014.11889701", + "private-url": "https://drive.google.com/file/d/10QQdymf89PA3VlcPcRSf25_f_evQZbl2/view?usp=drive_link" + }, + "boels-et-al-2019": { + "in-text": "Boels et al, 2019", + "apa": "Boels, L., Bakker, A., Van Dooren, W., & Drijvers, P. (2019). Conceptual difficulties when interpreting histograms: A review. _Educational Research Review_.", + "public-url": "https://www.sciencedirect.com/science/article/pii/S1747938X18304615", + "private-url": "https://drive.google.com/file/d/1MrfWOMeC8IGm9-MNvRPBVKvj_3M7CjKF/view?usp=drive_link" + }, + "lem-et-al-2011": { + "in-text": "Lem et al, 2011", + "apa": "Lem, S., Onghena, P., Verschaffel, L., & Van Dooren, W. (2011). Coordinating between histograms and box plots. Dublin, Ireland: IASE Satellite Conferences.", + "public-url": "https://iase-web.org/documents/papers/sat2011/IASE2011PaperPoster6Lemetal.pdf?1402524996", + "private-url": "https://drive.google.com/file/d/155fVvTgFW6BpuabALl_ReVaWRkuhIbeU/view?usp=drive_link" + }, + "whittaker-jacobbe-2017": { + "in-text": "Whittaker & Jacobbe, 2017", + "apa": "Douglas Whitaker, Tim Jacobbe (2017). Students' Understanding of Bar Graphs and Histograms: Results From the LOCUS Assessments. _Journal of Statistics Education_, 25(2), 90-102.", + "public-url": "https://www.tandfonline.com/doi/epdf/10.1080/10691898.2017.1321974?needAccess=true", + "private-url": "https://drive.google.com/file/d/1SMqmWpwstho2BrCGd34wlrbzVPInTrF7/view?usp=sharing" + } +} \ No newline at end of file diff --git a/shared/langs/en-us/docroot/index.shtml b/shared/langs/en-us/docroot/index.shtml index 0613918e6c2..1782c526132 100644 --- a/shared/langs/en-us/docroot/index.shtml +++ b/shared/langs/en-us/docroot/index.shtml @@ -29,8 +29,8 @@ // certain lessons are "preferred" depending on the subject var subjectPreference = { math : ["numbers-inside-video-games","order-of-operations", "coordinates", "distance", "function-composition", "bar-and-pie-charts", "scatter-plots", "histograms", "box-plots","functions-for-character-animation", "inequalities3-sam", "surface-area-rect-prism", "measures-of-center", "functions-dr", "flags", "inequalities1-simple", "inequalities2-compound", "linear-regression", "combinatorics-combination", "combinatorics-permutation", "function-definition-linear", "functions-can-be-linear", "problem-decomposition"], - history : ["ds-intro", "ethics-privacy-and-bias", "threats-to-validity", "correlations", "bar-and-pie-charts", "flags", "histograms", "visualizing-the-shape-of-data"], - science : ["simple-data-types", "ds-intro", "numbers-inside-video-games", "simple-data-types", "bar-and-pie-charts", "scatter-plots", "histograms", "visualizing-the-shape-of-data", "measures-of-center", "box-plots"], + history : ["ds-intro", "ethics-privacy-and-bias", "threats-to-validity", "correlations", "bar-and-pie-charts", "flags", "histograms", "histograms-interpret"], + science : ["simple-data-types", "ds-intro", "numbers-inside-video-games", "simple-data-types", "bar-and-pie-charts", "scatter-plots", "histograms", "histograms-interpret", "measures-of-center", "box-plots"], cs : ["ds-intro", "simple-data-types", "contracts", "defining-values", "defining-functions", "re-intro-to-data-structures", "function-composition", "lookups", "if-expressions", "making-game-images", "table-methods", "composing-table-operations", "advanced-visualizations", "functions-for-character-animation", "player-animation", "problem-decomposition", "re-adding-collisions", "re-adding-levels", "re-build-your-own-animation", "re-functions-that-ask-questions", "re-key-events", "re-making-pong", "re-nested-structures", "re-refactoring", "re-scoring", "re-structures-reactors-animations", "re-timers", "re-your-own-drawing-functions"] }; diff --git a/shared/langs/en-us/learningObjectives/statistics.json b/shared/langs/en-us/learningObjectives/statistics.json index 40a3e0e3bed..e0b5e228061 100644 --- a/shared/langs/en-us/learningObjectives/statistics.json +++ b/shared/langs/en-us/learningObjectives/statistics.json @@ -1,201 +1,244 @@ { - "example-learning-objective": { + "example-learning-objective": { "text": "Prose we want to show up in the lesson.", "standards": ["std1", "std2"], "assessment-rec": "author notes about how it might be assessed" }, - "table-build-column" : { - "text": "Add a column to a table.", + "cat-v-quant-def": { + "text":"Explain the difference between Categorical and Quantitative data, recognizing that Quantitative data measures an amount while Categorical data is used to qualify.", "standards": [""], - "assessment-rec": "Give students a starter-file with a table and have them add a column" + "assessment-rec": "" }, - "table-filter" : { - "text": "Filter a table using a Boolean-producing function.", + "cat-v-quant-variable": { + "text":"Identify whether a variable in a dataset is Categorical or Quantitative by considering whether the data tells us \"what kind?\" or \"how much?\"", "standards": [""], - "assessment-rec": "Give students a starter-file with a table and have them filter it." + "assessment-rec": "" }, - "stdev-understand" : { - "text": "Use standard deviation to make judgments about data, and understand the role it plays in those judgements.", - "standards": ["HSS.ID.A.2"], - "assessment-rec": "Compare two datasets, their means and stdevs, and a conclusion that ignores the stdev. Ask students why the conclusion is wrong" + "pros-cons-bar-pie": { + "text":"Recognize that while pie charts and bar charts both display the counts of categorical data, consideration should be given to which is a more useful display for a given situation.", + "standards": [""], + "assessment-rec": "" }, - "stdev-calc" : { - "text": "Calculate the standard deviation of a dataset.", - "standards": ["HSS.ID.A.2"], - "assessment-rec": "Give students a dataset, have them calculate the stdev" + "interpret-bar": { + "text":"Interpret bar charts to answer statistical questions about a dataset.", + "standards": [""], + "assessment-rec": "" }, - "boxplot-create" : { - "text": "Compute the 5-number summary and draw the box plot for a given dataset.", - "standards": ["6.SP.B.4","HSS.ID.A.1"], - "assessment-rec": "Give students a dataset, have them calculate the 5-number summary and draw a box plot" + "interpret-pie": { + "text":"Interpret pie charts to answer statistical questions about a dataset.", + "standards": [""], + "assessment-rec": "" }, - "spread-transfer" : { - "text": "Compare and contrast information displayed in a box plot and a histogram, identifying that box plots have variable intervals and histograms have fixed bins.", - "standards": ["6.SP.B.4","HSS.ID.A.1"], - "assessment-rec": "Have students match box plots to histograms" + "interpret-stacked-bar": { + "text":"Interpret stacked bar charts to answer statistical questions about a dataset.", + "standards": [""], + "assessment-rec": "" }, - "histograms-pyret": { - "text":"Create histograms from a dataset in Pyret.", + "interpret-multi-bar": { + "text":"Interpret multi-bar charts to answer statistical questions about a dataset.", "standards": [""], "assessment-rec": "" }, - "random-sample-pyret": { - "text":"Use Pyret to generate differently sized random samples from a dataset.", + "describe-data-cycle": { + "text":"Describe the four parts of the Data Cycle, including: (1) ask questions, (2) consider the data, (3) analyze the data, and (4) interpret the data.", "standards": [""], "assessment-rec": "" }, - "measures-of-center-pyret":{ - "text":"Compute measures of center including mean, median and mode(s) of a dataset in Pyret.", + "develop-statistical-question": { + "text":"Develop a statistical research question about a real-world dataset.", "standards": [""], "assessment-rec": "" }, - "measures-of-spread-pyret":{ - "text":"Compute measures of spread of a dataset including range and IQR in Pyret.", + "stat-inference": { + "text":"Explain how statistical inference uses information from a sample (e.g., probability) to draw conclusions about the larger population from which the sample was taken.", "standards": [""], "assessment-rec": "" }, - "categorical-visualizations-pyret":{ - "text":"Create categorical visualizations from a single column of a dataset in Pyret (i.e. pie charts, bar charts).", + "sample-size": { + "text":"Describe the role of sample size, recognizing that larger samples generally yield better estimates of what's true for a population.", "standards": [""], "assessment-rec": "" }, - "compound-categorical-visualizations-pyret":{ - "text":"Create categorical visualizations from two columns of a dataset in Pyret (i.e. multi-bar charts, stacked-bar charts).", + "random-sample-bias": { + "text":"Describe how random sampling can help us avoid bias in our analysis.", "standards": [""], "assessment-rec": "" }, - "pros-cons-bar-pie": { - "text":"Recognize that while pie charts and bar charts both display the counts of categorical data, consideration should be given to which is a more useful display for a given situation.", + "sensible-grouped-samples": { + "text":"Identify sensible grouped samples for a dataset.", "standards": [""], "assessment-rec": "" }, - "interpret-bar": { - "text":"Interpret bar charts to answer statistical questions about a dataset.", + "infographic-ratios": { + "text":"Assess whether various infographics effectively incorporate ratios and proportions by considering the infographic's impact.", "standards": [""], "assessment-rec": "" }, - "interpret-pie": { - "text":"Interpret pie charts to answer statistical questions about a dataset.", + "infographic-stereotypes": { + "text":"Avoid stereotyping when creating an infographic in order to help viewers relate to and understand the topic.", "standards": [""], "assessment-rec": "" }, - "interpret-stacked-bar": { - "text":"Interpret stacked bar charts to answer statistical questions about a dataset.", + "rows-and-columns": { + "text":"Identify the specific Rows and Columns needed to answer various questions.", "standards": [""], "assessment-rec": "" }, - "interpret-multi-bar": { - "text":"Interpret multi-bar charts to answer statistical questions about a dataset.", + "describe-dataset": { + "text":"Describe a dataset by discussing its source, structure, and relevance.", "standards": [""], "assessment-rec": "" }, - "sensible-grouped-samples": { - "text":"Identify sensible grouped samples for a dataset.", + "define-subsets": { + "text":"Define tables of differently sized subsets from a dataset in Pyret.", "standards": [""], "assessment-rec": "" }, - "develop-statistical-question": { - "text":"Develop a statistical research question about a real-world dataset.", + "explanatory-response": { + "text":" Differentiate between an explanatory variable and a response variable, recognizing that the response variable (plotted on the y-axis) responds to the explanatory variable (plotted on the x-axis).", "standards": [""], "assessment-rec": "" }, - "cat-v-quant-def": { - "text":"Explain the difference between Categorical and Quantitative data, recognizing that Quantitative data measures an amount while Categorical data is used to qualify.", + "scatter-plots-by-hand": { + "text":"Make scatter plots by hand given a list of (x,y) pairs.", "standards": [""], "assessment-rec": "" }, - "cat-v-quant-variable": { - "text":"Identify whether a variable in a dataset is Categorical or Quantitative by considering whether the data tells us \"what kind?\" or \"how much?\"", + "scatter-plots-proglang": { + "text":"Make scatter plots in @proglang.", "standards": [""], "assessment-rec": "" }, - - - "use-contracts": { - "text":"Use contracts to write expressions that will produce one-variable visualizations in Pyret (pie charts, bar charts, histograms, and box plots).", + "trends-in-scatter-plots": { + "text":"Identify possible trends in scatter-plots by exploring relationships between columns.", "standards": [""], "assessment-rec": "" }, - "read-contracts": { - "text":"Make sense of functions in @proglang by interpreting their contracts.", + "unusual-observations": { + "text":"Understand that unusual observations are always worth thinking about: sometimes they are random, sometimes they provide deeper insight, and sometimes are the points we are looking for.", "standards": [""], "assessment-rec": "" }, - "infographic-ratios": { - "text":"Assess whether various infographics effectively incorporate ratios and proportions by considering the infographic's impact.", + "visual-cues": { + "text":"Explain how visual cues can enhance a display by making it more meaningful, accessible, or exciting.", "standards": [""], "assessment-rec": "" }, - "infographic-stereotypes": { - "text":"Avoid stereotyping when creating an infographic in order to help viewers relate to and understand the topic.", + "order-matters": { + "text":" Understand that composing table operations in a particular order (build, filter, sort) is required to avoid crashing the program or producing nonsensical results.", "standards": [""], "assessment-rec": "" }, - "data-cycle-parts": { - "text":"Describe the four parts of the Data Cycle, including: (1) ask questions, (2) consider the data, (3) analyze the data, and (4) interpret the data.", + "diagram-composition": { + "text":" Diagram function composition to make sense of the order in which table operations must be performed.", "standards": [""], "assessment-rec": "" }, - "stat-inference": { - "text":"Explain how statistical inference uses information from a sample (e.g., probability) to draw conclusions about the larger population from which the sample was taken.", + "compose-table-ops": { + "text":"Compose table operations to conduct more sophisticated analyses.", "standards": [""], "assessment-rec": "" }, - "sample-size": { - "text":"Describe the role of sample size, recognizing that larger samples generally yield better estimates of what's true for a population.", + "make-dot-plots": { + "text": "Make dot plots by hand to display the frequency of values in quantitative data.", "standards": [""], "assessment-rec": "" }, - "random-sample-bias": { - "text":"Describe how random sampling can help us avoid bias in our analysis.", + "dot-plot-features": { + "text": "Describe the distribution of data in a dot plot by identifying peaks, clusters, gaps, outliers, and typicality.", "standards": [""], "assessment-rec": "" }, - "describe-data-cycle": { - "text":"Describe the four parts of the Data Cycle, including: (1) ask questions, (2) consider the data, (3) analyze the data, and (4) interpret the data.", + "variability-define": { + "text": "Define the variability of a categorical dataset as how different or alike data points are.", "standards": [""], "assessment-rec": "" }, - "rows-and-columns": { - "text":"Identify the specific Rows and Columns needed to answer various questions.", + "variability-describe": { + "text": "Describe the variability of a data in a dot plot, recognizing that increased variability results in a wider spread while decreased variability results in a more narrow spread.", "standards": [""], "assessment-rec": "" }, - "describe-dataset": { - "text":"Describe a dataset by discussing its source, structure, and relevance.", + "dot-plot-v-histogram": { + "text": "Compare and contrast dot plots and histograms, recognizing that: (1) both display the frequency and distribution of quantitative data, and (2) dot plots display the frequency of individual values while histograms display the frequency of groups of values.", + "standards": [""], + "assessment-rec": "match dot plots and histograms; evaluate merits of each" + }, + "best-measure-of-center":{ + "text":"Determine which measure of center – mean, median, or mode – best summarizes a dataset.", "standards": [""], "assessment-rec": "" }, - "define-subsets": { - "text":"Define tables of differently sized subsets from a dataset in Pyret.", + "histograms-and-measures-of-center":{ + "text":"Draw conclusions about the mean, median, and mode of a dataset by represented by a histogram.", "standards": [""], "assessment-rec": "" }, - "explanatory-response": { - "text":" Differentiate between an explanatory variable and a response variable, recognizing that the response variable (plotted on the y-axis) responds to the explanatory variable (plotted on the x-axis).", + "peaks-clusters-gaps": { + "text": "Describe the distribution of data in a histogram by identifying peaks, clusters, gaps, and outliers in histograms.", "standards": [""], "assessment-rec": "" }, - "scatter-plots-by-hand": { - "text":"Make scatter plots by hand given a list of (x,y) pairs", + "histogram-skew-or-symmetric": { + "text": "Identify a histogram's shape as skewed right, skewed left, or symmetric.", "standards": [""], "assessment-rec": "" }, - "scatter-plots-pyret": { - "text":"Make scatter plots in Pyret", + "bar-chart-v-histogram": { + "text": "Understand that bar charts display the frequency of categorical data while histograms show the frequency and distribution of quantitative data.", + "standards": [""], + "assessment-rec": "use enrollment-by-college question from Whitaker & Jacobbe, 2017" + }, + "histogram-shape": { + "text": "Use a histogram's shape to draw conclusions about quantitative data.", + "standards": [""], + "assessment-rec": "update Joy's What Shape Makes Sense, make it required" + }, + "choosing-bin-size": { + "text": "Recognize that changing a histogram's bin size impacts the histogram's shape and can affect what we learn about the distribution of the data.", "standards": [""], "assessment-rec": "" }, - "trends-in-scatter-plots": { - "text":"Identify possible trends in scatter-plots by exploring relationships between columns.", + "make-histograms": { + "text": "Make histograms by hand and in @proglang to represent a quantitative dataset.", "standards": [""], "assessment-rec": "" }, - "unusual-observations": { - "text":"Understand that unusual observations are always worth thinking about: sometimes they are random, sometimes they provide deeper insight, and sometimes are the points we are looking for.", + "histogram-center-spread": { + "text": "Make claims about the center and spread of a histogram.", "standards": [""], "assessment-rec": "" }, + "histogram-variability": { + "text": "Assess the level of variability displayed by a histogram.", + "standards": [""], + "assessment-rec": "" + }, + "histograms-by-hand": { + "text": "Make histograms by hand to represent a quantitative dataset.", + "standards": [""], + "assessment-rec": "" + }, + "boxplot-create" : { + "text": "Compute the 5-number summary and draw the box plot for a given dataset.", + "standards": ["6.SP.B.4","HSS.ID.A.1"], + "assessment-rec": "Give students a dataset, have them calculate the 5-number summary and draw a box plot" + }, + "spread-transfer" : { + "text": "Compare and contrast information displayed in a box plot and a histogram, identifying that box plots have variable intervals and histograms have fixed bins.", + "standards": ["6.SP.B.4","HSS.ID.A.1"], + "assessment-rec": "Have students match box plots to histograms" + }, + "stdev-understand" : { + "text": "Use standard deviation to make judgments about data, and understand the role it plays in those judgements.", + "standards": ["HSS.ID.A.2"], + "assessment-rec": "Compare two datasets, their means and stdevs, and a conclusion that ignores the stdev. Ask students why the conclusion is wrong" + }, + "stdev-calc" : { + "text": "Calculate the standard deviation of a dataset.", + "standards": ["HSS.ID.A.2"], + "assessment-rec": "Give students a dataset, have them calculate the stdev" + }, "form": { "text":"Determine whether a scatter plot's form seems to be linear, nonlinear, or have no relationship at all.", "standards": [""], @@ -216,73 +259,98 @@ "standards": ["OK.A1.D.1.2", "OK.A2.D.1.2", "OK.A2.D.1.3", "8.SP.A.2", "8.SP.A.3"], "assessment-rec": "" }, - "develop-survey": { - "text":"Develop a survey that avoids data-hygiene problems such as missing data, inconsistent types, inconsistent units, and inconsistent naming.", + "threats-demonstrate": { + "text":"Demonstrate how various threats to validity can impact the validity of conclusions drawn when research is conducted.", "standards": [""], "assessment-rec": "" }, - "response-validation": { - "text":"Use response validation in Google Forms, including Required Questions, Question Format, Descriptive Instructions, and Specify Parameters.", + "threats-identify": { + "text":"Identify threats to validity in a variety of scenarios and explain how they can result in drawing unreliable conclusions.", "standards": [""], "assessment-rec": "" }, - "id-hygiene-probs": { - "text":"Identify different data hygiene problems that can compromise analysis (i.e., variations in spelling and capitalization, typos, etc).", + "threats-minimize": { + "text":" Describe some of the ways in which good data scientists can minimize threats to validity.", "standards": [""], "assessment-rec": "" }, - "import-data": { - "text":"Import data from a Google Sheet into Pyret.", + "when-subsets-make-sense": { + "text":"Understand that when there is variability between subsets in a column of data it might not make sense to look for trends in the whole.", "standards": [""], "assessment-rec": "" }, - "visual-cues": { - "text":"Explain how visual cues can enhance a display by making it more meaningful, accessible, or exciting.", + "use-contracts": { + "text":"Use contracts to write expressions that will produce one-variable visualizations in @proglang (pie charts, bar charts, histograms, and box plots).", "standards": [""], "assessment-rec": "" }, - "advanced-display-pyret": { - "text":"Create an advanced display in Pyret to present information about an additional variable that would otherwise not be considered.", + "read-contracts": { + "text":"Make sense of functions in @proglang by interpreting their contracts.", "standards": [""], "assessment-rec": "" }, - "threats-demonstrate": { - "text":"Demonstrate how various threats to validity can impact the validity of conclusions drawn when research is conducted.", + "table-build-column" : { + "text": "Add a column to a table.", + "standards": [""], + "assessment-rec": "Give students a starter-file with a table and have them add a column" + }, + "table-filter" : { + "text": "Filter a table using a Boolean-producing function.", + "standards": [""], + "assessment-rec": "Give students a starter-file with a table and have them filter it." + }, + "random-sample-pyret": { + "text":"Use Pyret to generate differently sized random samples from a dataset.", "standards": [""], "assessment-rec": "" }, - "threats-identify": { - "text":"Identify threats to validity in a variety of scenarios and explain how they can result in drawing unreliable conclusions.", + "measures-of-center-proglang":{ + "text":"Compute measures of center including mean, median and mode(s) of a dataset in @proglang.", "standards": [""], "assessment-rec": "" }, - "threats-minimize": { - "text":" Describe some of the ways in which good data scientists can minimize threats to validity.", + "measures-of-spread-proglang":{ + "text":"Compute measures of spread of a dataset including range and IQR in @proglang.", "standards": [""], "assessment-rec": "" }, - "order-matters": { - "text":" Understand that composing table operations in a particular order (build, filter, sort) is required to avoid crashing the program or producing nonsensical results.", + "histograms-proglang": { + "text": "Make histograms in proglang to represent a quantitative dataset.", "standards": [""], "assessment-rec": "" }, - "diagram-composition": { - "text":" Diagram function composition to make sense of the order in which table operations must be performed.", + "categorical-visualizations-proglang":{ + "text":"Create categorical visualizations from a single column of a dataset in @proglang (i.e. pie charts, bar charts).", "standards": [""], "assessment-rec": "" }, - "compose-table-ops": { - "text":"Compose table operations to conduct more sophisticated analyses.", + "compound-categorical-visualizations-proglang":{ + "text":"Create categorical visualizations from two columns of a dataset in @proglang (i.e. multi-bar charts, stacked-bar charts).", "standards": [""], "assessment-rec": "" }, - "when-subsets-make-sense": { - "text":"Understand that when there is variability between subsets in a column of data it might not make sense to look for trends in the whole.", + "advanced-display-pyret": { + "text":"Create an advanced display in Pyret to present information about an additional variable that would otherwise not be considered.", "standards": [""], "assessment-rec": "" }, - "": { - "text":"", + "develop-survey": { + "text":"Develop a survey that avoids data-hygiene problems such as missing data, inconsistent types, inconsistent units, and inconsistent naming.", + "standards": [""], + "assessment-rec": "" + }, + "response-validation": { + "text":"Use response validation in Google Forms, including Required Questions, Question Format, Descriptive Instructions, and Specify Parameters.", + "standards": [""], + "assessment-rec": "" + }, + "id-hygiene-probs": { + "text":"Identify different data hygiene problems that can compromise analysis (i.e., variations in spelling and capitalization, typos, etc).", + "standards": [""], + "assessment-rec": "" + }, + "import-data": { + "text":"Import data from a Google Sheet into @proglang.", "standards": [""], "assessment-rec": "" } diff --git a/shared/langs/en-us/misconceptions.json b/shared/langs/en-us/misconceptions.json index 2801794061e..50fb8f2dd0f 100644 --- a/shared/langs/en-us/misconceptions.json +++ b/shared/langs/en-us/misconceptions.json @@ -63,6 +63,7 @@ "cooper-shore-2008" ] }, + "ms-bp-1": { "misconception" : "Students often think that the median line actually represents the mean, causing misinterpretation of skewed datasets", "addressed" : false, @@ -88,7 +89,8 @@ "" ], "lessons" : [ - "box-plots" + "box-plots", + "dot-plots" ], "standards": [ "" @@ -96,5 +98,115 @@ "source": [ "bakker-et-al-2005" ] + }, + + "ms-variability-1": { + "misconception" : "Students have an oversimplified view of variability: they believe that variability can be judged based solely on the range of the data, and they think that histograms with greater variability in heights of bars indicates greater variability of the dataset.", + "addressed" : true, + "proposed-solution" : [ + "Instructors should explicitly discuss the concept of variability of data in general and not limit the focus to quantifying variability through common measures such as range, interquartile range, and standard deviation", + "To gain a better understanding of how variability is represented in histograms of quantitative data, students should examine histograms of little and great variation.", + "To facilitate understanding the connection between shape of a distribution and likely relative positions of center,instructors might consider first having students find measures of center from graphs where the raw data are completely accessible.", + "When extracting data from graphs, students should be asked to identify the data values. Otherwise, when finding summary measures, students may revert to using memorized algorithms." + ], + "lessons" : [ + "histograms", + "dot-plots" + ], + "standards": [ + "6.SP.B.5.c" + ], + "sources": [ + "cooper-2018" + ] + }, + "ms-variability-2": { + "misconception" : "Variability of categorical data is often left out of the conversation, despite being more intuitive to students, Sometimes when it is introduced, students overgeneralize and think of variability as unlikeability in a variety of settings (histograms)", + "addressed" : true, + "proposed-solution" : [ + "Provide instruction on variability of categorical data, while emphasizing that there are multiple ways to assess variability." + ], + "lessons" : [ + "variability" + ], + "standards": [ + "6.SP.B.5.c" + ], + "sources": [ + "kader-2007" + ] + }, + + + "ms-histogram-v-bar": { + "misconception": "Students do not understand the distinction between a bar chart and a histogram, and why this distinction is important; consequently, they do not understand the different methods used to evaluate variability from these different representations.", + "addressed": false, + "proposed-solution": [ + "Focus on dot plots as a “bridge” representation (from bar graphs to histograms); they are easier to interpret and will not be confused with bar graphs.", + "Integrate tech that “fuses” dots into other displays." + ], + "lessons": [ + "" + ], + "sources": [ + "kaplan-et-al-2014", + "cooper-shore-2008" + ] + }, + "ms-axes-confusion": { + "misconception": "Students use the frequency (y-axis) instead of the data values (x-axis) when reporting on variability.", + "addressed": false, + "proposed-solution": [ + "Discuss the concept of variability of data in general; do not limit the focus to common measures such as range, interquartile range, and standard deviation.", + "Examine histograms of both little and great variation and discuss how distributions with the same mean and median could differ in variability." + ], + "lessons": [ + "" + ], + "sources": [ + "cooper-shore-2008" + ] + }, + "ms-histogram-variability": { + "misconception": "Students believe that a flatter histogram equates to less variability in the data than a bumpy histogram.", + "addressed": false, + "proposed-solution": [ + "Focus on dot plots as a “bridge” representation (from bar graphs to histograms); they are easier to interpret and will not be confused with bar graphs.", + "Integrate tech that “fuses” dots into other displays." + ], + "lessons": [ + "" + ], + "sources": [ + "kaplan-et-al-2014" + ] + }, + "ms-histogram-time": { + "misconception": "Students add a time component to the variable graphed on the horizontal axis.", + "addressed": false, + "proposed-solution": [ + "Ensure that students can distinguish between axes on a histogram (see ms-axes-confusion)" + ], + "lessons": [ + "" + ], + "sources": [ + "kaplan-et-al-2014" + ] + }, + "ms-histogram-bp": { + "misconception": "Students cannot link a histogram to a corresponding boxplot .", + "addressed": false, + "proposed-solution": [ + "With regard to education, we can say that, before students are confronted with both histograms and box plots to solve a certain task, it is important that they are able to correctly interpret each of these representations separately. On the other hand, however, the use of both representations together might be a fruitful way of letting students understand the merits and pitfalls of both representations, giving them a chance to construct a more coherent understanding of both representations (Lem et al, 2011, p5)." + ], + "lessons": [ + "" + ], + "sources": [ + "boels-et-al-2019", + "lem-et-al-2011" + + ] } } \ No newline at end of file diff --git a/shared/langs/en-us/practices/practices-cc-dictionary.json b/shared/langs/en-us/practices/practices-cc-dictionary.json index 623efa0aec2..d5361b6c537 100644 --- a/shared/langs/en-us/practices/practices-cc-dictionary.json +++ b/shared/langs/en-us/practices/practices-cc-dictionary.json @@ -63,7 +63,7 @@ "standard-deviation", "box-plots", "linear-regression", - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms", "correlations", "surface-area-rect-prism", diff --git a/shared/langs/en-us/practices/practices-sep-dictionary.json b/shared/langs/en-us/practices/practices-sep-dictionary.json index 8c06b98b32d..8301cd8279b 100644 --- a/shared/langs/en-us/practices/practices-sep-dictionary.json +++ b/shared/langs/en-us/practices/practices-sep-dictionary.json @@ -18,7 +18,7 @@ "box-plots", "measures-of-center", "linear-regression", - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms", "grouped-samples", "bar-and-pie-charts", diff --git a/shared/langs/en-us/standards/standards-cc-math-dictionary.json b/shared/langs/en-us/standards/standards-cc-math-dictionary.json index 3626dfc266c..ec132e38014 100644 --- a/shared/langs/en-us/standards/standards-cc-math-dictionary.json +++ b/shared/langs/en-us/standards/standards-cc-math-dictionary.json @@ -55,9 +55,9 @@ "description": "Apply and extend previous understandings of multiplication and division to multiply and divide fractions.", "lessons": [] }, - "5.NF.B.7.B": { + "5.NF.B.7.B":{ "description": "Interpret division of a whole number by a unit fraction, and compute such quotients.", - "lessons": ["multiplicat-inverse"] + "lessons":["multiplicative-inverse"] }, "5.MD.A": { "description": "Convert like measurements units within a given measurement system.", @@ -341,7 +341,7 @@ "lessons": [ "measures-of-center", "histograms", - "visualizing-the-shape-of-data ", + "histograms-interpret ", "box-plots" ] }, @@ -350,12 +350,11 @@ "lessons": ["ds-intro", "choosing-your-dataset", "data-cycle"] }, "6.SP.A.2": { - "description": "Understand that a set of data collected to answer a statistical question has a distribution which can be described by its center, spread, overall shape, or a frequency table.", + "description": "Understand that a set of data collected to answer a statistical question has a distribution which can be described by its center, spread, and overall shape.", "lessons": [ - "contracts-tables-visualizations", "measures-of-center", "histograms", - "visualizing-the-shape-of-data ", + "histograms-interpret ", "box-plots", "standard-deviation" ] @@ -368,7 +367,7 @@ "description": "Summarize and describe distributions.", "lessons": [ "measures-of-center", - "visualizing-the-shape-of-data ", + "histograms-interpret ", "box-plots", "project-data-exploration", "project-research-paper" @@ -379,10 +378,10 @@ "lessons": [ "standard-deviation", "box-plots", - "visualizing-the-shape-of-data ", + "histograms-interpret ", "histograms", "project-data-exploration", - "project-research-paper", + "project-research-paper", "codap-dot-plots-bar-charts" ] }, @@ -1739,7 +1738,7 @@ "codap-dot-plots-bar-charts", "measures-of-center", "histograms", - "visualizing-the-shape-of-data ", + "histograms-interpret ", "box-plots", "project-infographic" ] @@ -1749,7 +1748,7 @@ "lessons": [ "standard-deviation", "box-plots", - "visualizing-the-shape-of-data ", + "histograms-interpret ", "histograms" ] }, @@ -1768,7 +1767,7 @@ "standard-deviation", "box-plots", "measures-of-center", - "visualizing-the-shape-of-data ", + "histograms-interpret ", "histograms" ] }, diff --git a/shared/langs/en-us/standards/standards-csta-dictionary.json b/shared/langs/en-us/standards/standards-csta-dictionary.json index e2b32c230dc..fc2cb11486f 100644 --- a/shared/langs/en-us/standards/standards-csta-dictionary.json +++ b/shared/langs/en-us/standards/standards-csta-dictionary.json @@ -122,7 +122,7 @@ "standard-deviation", "box-plots", "linear-regression", - "visualizing-the-shape-of-data", + "histograms-interpret", "correlations" ] }, @@ -184,7 +184,7 @@ "standard-deviation", "box-plots", "linear-regression", - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms", "bar-and-pie-charts", "choosing-your-dataset" @@ -637,7 +637,7 @@ "project-research-paper", "project-data-exploration", "advanced-visualizations", - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms", "choosing-your-dataset", "project-video-game", diff --git a/shared/langs/en-us/standards/standards-k12cs-dictionary.json b/shared/langs/en-us/standards/standards-k12cs-dictionary.json index 6a3369e880e..9f849d1dd8f 100644 --- a/shared/langs/en-us/standards/standards-k12cs-dictionary.json +++ b/shared/langs/en-us/standards/standards-k12cs-dictionary.json @@ -73,7 +73,7 @@ "scatter-plots", "standard-deviation", "box-plots", - "visualizing-the-shape-of-data", + "histograms-interpret", "choosing-your-dataset" ] }, diff --git a/shared/langs/en-us/standards/standards-ma-dictionary.json b/shared/langs/en-us/standards/standards-ma-dictionary.json index e18d7915959..45c434c5dbf 100644 --- a/shared/langs/en-us/standards/standards-ma-dictionary.json +++ b/shared/langs/en-us/standards/standards-ma-dictionary.json @@ -1049,7 +1049,7 @@ "codap-dot-plots-bar-charts", "codap-exploring", "histograms", - "visualizing-the-shape-of-data", + "histograms-interpret", "box-plots", "advanced-visualizations", "scatter-plots", diff --git a/shared/langs/en-us/standards/standards-ok-dictionary.json b/shared/langs/en-us/standards/standards-ok-dictionary.json index 3e2207850e9..2dd905b3b1a 100644 --- a/shared/langs/en-us/standards/standards-ok-dictionary.json +++ b/shared/langs/en-us/standards/standards-ok-dictionary.json @@ -75,7 +75,7 @@ "description": "Identify, describe, and analyze linear relationships between two variables.", "lessons": [ "probability-inference", - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms", "grouped-samples", "choosing-your-dataset", @@ -193,7 +193,7 @@ "OK.PA.D.1.1": { "description": "Describe the impact that inserting or deleting a data point has on the mean and the median of a data set. Know how to create data displays using a spreadsheet and use a calculator to examine this impact.", "lessons": [ - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms", "grouped-samples", "choosing-your-dataset" @@ -490,7 +490,7 @@ "OK.A1.D.1.1": { "description": "Describe a data set using data displays, describe and compare data sets using summary statistics, including measures of central tendency, location, and spread. Know how to use calculators, spreadsheets, or other appropriate technology to display data and calculate summary statistics.", "lessons": [ - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms", "grouped-samples", "choosing-your-dataset" @@ -1125,7 +1125,7 @@ "lessons": [ "standard-deviation", "box-plots", - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms", "grouped-samples", "bar-and-pie-charts", @@ -1170,7 +1170,7 @@ "lessons": [ "standard-deviation", "box-plots", - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms", "grouped-samples", "bar-and-pie-charts", @@ -1182,7 +1182,7 @@ "lessons": [ "standard-deviation", "box-plots", - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms", "grouped-samples", "bar-and-pie-charts", @@ -1602,7 +1602,7 @@ "OK.L1.AP.PD.05": { "description": "Evaluate and refine computational artifacts to make them more user-friendly, efficient and/or accessible.", "lessons": [ - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms" ] }, diff --git a/shared/langs/en-us/standards/standards-or-dictionary.json b/shared/langs/en-us/standards/standards-or-dictionary.json index 7fbaacee12e..774ba5bc0eb 100644 --- a/shared/langs/en-us/standards/standards-or-dictionary.json +++ b/shared/langs/en-us/standards/standards-or-dictionary.json @@ -333,7 +333,7 @@ "data-cycle", "measures-of-center", "histograms", - "visualizing-the-shape-of-data", + "histograms-interpret", "box-plots" ] }, @@ -357,7 +357,7 @@ "lessons": [ "measures-of-center", "histograms", - "visualizing-the-shape-of-data", + "histograms-interpret", "box-plots", "standard-deviation" ] @@ -384,7 +384,7 @@ "description": "Interpret data and answer investigative questions.", "lessons": [ "measures-of-center", - "visualizing-the-shape-of-data", + "histograms-interpret", "box-plots", "project-data-exploration", "project-research-paper" @@ -1138,7 +1138,7 @@ "lessons": [ "standard-deviation", "box-plots", - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms" ] }, @@ -1157,7 +1157,7 @@ "standard-deviation", "box-plots", "measures-of-center", - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms" ] }, diff --git a/shared/langs/en-us/standards/standards-or-dictionary.rkt b/shared/langs/en-us/standards/standards-or-dictionary.rkt index eaa9be0f711..7b3e4054666 100644 --- a/shared/langs/en-us/standards/standards-or-dictionary.rkt +++ b/shared/langs/en-us/standards/standards-or-dictionary.rkt @@ -262,7 +262,7 @@ "data-cycle" "measures-of-center" "histograms" - "visualizing-the-shape-of-data" + "histograms-interpret" "box-plots" ) ("6.DR.A.1" @@ -280,7 +280,7 @@ "Collect and record data with technology to identify and describe the characteristics of numerical data sets using quantitative measures of center and variability." "measures-of-center" "histograms" - "visualizing-the-shape-of-data" + "histograms-interpret" "box-plots" "standard-deviation" ) @@ -301,7 +301,7 @@ ("6.DR.D" "Interpret data and answer investigative questions." "measures-of-center" - "visualizing-the-shape-of-data" + "histograms-interpret" "box-plots" "project-data-exploration" "project-research-paper" @@ -873,7 +873,7 @@ "Identify appropriate ways to summarize and then represent the distribution of univariate and bivariate data multiple ways with graphs and/or tables. Use technology to present data that supports interpretation of tabular and graphical representations." "standard-deviation" "box-plots" - "visualizing-the-shape-of-data" + "histograms-interpret" "histograms" ) ("HS.DR.C.9" @@ -888,7 +888,7 @@ "standard-deviation" "box-plots" "measures-of-center" - "visualizing-the-shape-of-data" + "histograms-interpret" "histograms" ) ("HS.DR.D" diff --git a/shared/langs/en-us/starterFiles/data-science.json b/shared/langs/en-us/starterFiles/data-science.json index c55f30b00d1..3accc826427 100644 --- a/shared/langs/en-us/starterFiles/data-science.json +++ b/shared/langs/en-us/starterFiles/data-science.json @@ -220,5 +220,23 @@ "pyret": { "url": "https://code.pyret.org/editor#share=1rdPYXc64IkvSROcCWliYua7o81YBb0rb" } + }, + "tooth-data": { + "title": "Tooth Data Starter File", + "pyret": { + "url": "https://code.pyret.org/editor#share=1uZFkL86wWKpogpVOADdWlMXaQy3_GRjr&v=dc8be4e" + }, + "codap": { + "url": "https://codap.concord.org/app/static/dg/en/cert/index.html#shared=https%3A%2F%2Fcfm-shared.concord.org%2FsUrqFbpi6JhFtPs4qp6v%2Ffile.json" + } + }, + "expanded-animals-sub-pops": { + "title": "Dogs, Rabbits, Cats & Tarantulas Starter File", + "pyret": { + "url": "https://pyret-horizon.herokuapp.com/editor#share=1TVX_ReJIRhF-klkQKvVakGbTHOLtrn6C" + }, + "codap": { + "url": "https://codap.concord.org/app/static/dg/en/cert/index.html#shared=https%3A%2F%2Fcfm-shared.concord.org%2FsUrqFbpi6JhFtPs4qp6v%2Ffile.json" + } } } diff --git a/shared/langs/en-us/textbooks/textbook-im-6-dictionary.json b/shared/langs/en-us/textbooks/textbook-im-6-dictionary.json index 3aebe3517d3..23a20eb03ed 100644 --- a/shared/langs/en-us/textbooks/textbook-im-6-dictionary.json +++ b/shared/langs/en-us/textbooks/textbook-im-6-dictionary.json @@ -186,28 +186,28 @@ "IM.6.8.2": { "description": "Statistical Questions", "lessons": [ - "ds-intro" + "ds-intro", "project-data-exploration" ] }, "IM.6.8.6": { "description": "Histograms", "lessons": [ - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms" ] }, "IM.6.8.7": { "description": "Using Histograms to Answer Statistical Questions", "lessons": [ - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms" ] }, "IM.6.8.8": { "description": "Describing Distributions on Histograms", "lessons": [ - "visualizing-the-shape-of-data", + "histograms-interpret", "histograms" ] }, diff --git a/shared/langs/en-us/textbooks/textbook-im-8-dictionary.json b/shared/langs/en-us/textbooks/textbook-im-8-dictionary.json index dcf116be593..7ae958b7d10 100644 --- a/shared/langs/en-us/textbooks/textbook-im-8-dictionary.json +++ b/shared/langs/en-us/textbooks/textbook-im-8-dictionary.json @@ -76,7 +76,7 @@ "bar-and-pie-charts", "codap-dot-plots-bar-charts", "project-infographic", - "visualizing-the-shape-of-data" + "histograms-interpret" ] }, "IM.8.6.2": { diff --git a/shared/langs/en-us/textbooks/textbook-im-alg-1-dictionary.json b/shared/langs/en-us/textbooks/textbook-im-alg-1-dictionary.json index f89c4f7b1d9..62fbc5eac89 100644 --- a/shared/langs/en-us/textbooks/textbook-im-alg-1-dictionary.json +++ b/shared/langs/en-us/textbooks/textbook-im-alg-1-dictionary.json @@ -27,7 +27,7 @@ "IM.Alg1.1.4": { "description": "The Shape of Distributions", "lessons": [ - "visualizing-the-shape-of-data" + "histograms-interpret" ] }, "IM.Alg1.1.5": { diff --git a/shared/langs/en-us/textbooks/textbook-im-alg-2-dictionary.json b/shared/langs/en-us/textbooks/textbook-im-alg-2-dictionary.json index 9b5b4d81293..42fe5d0e0d2 100644 --- a/shared/langs/en-us/textbooks/textbook-im-alg-2-dictionary.json +++ b/shared/langs/en-us/textbooks/textbook-im-alg-2-dictionary.json @@ -586,7 +586,7 @@ "standard-deviation", "measures-of-center", "histograms", - "visualizing-the-shape-of-data" + "histograms-interpret" ] }, "IM.Alg2.7.5": { @@ -653,7 +653,7 @@ "description": "Heart Rates: Let’s collect and analyze data.", "lessons": [ ] - }, + } }