-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathDS101-04-02-lineplot_compare-ggplot.py
34 lines (27 loc) · 1.36 KB
/
DS101-04-02-lineplot_compare-ggplot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from ggplot.themes import theme_matplotlib
from pandas import *
from ggplot import *
import pandas
def lineplot_compare(hr_by_team_year_sf_la_csv):
# Write a function, lineplot_compare, that will read a csv file
# called hr_by_team_year_sf_la.csv and plot it using pandas and ggplot2.
#
# This csv file has three columns: yearID, HR, and teamID. The data in the
# file gives the total number of home runs hit each year by the SF Giants
# (teamID == 'SFN') and the LA Dodgers (teamID == "LAN"). Produce a
# visualization comparing the total home runs by year of the two teams.
#
# You can see the data in hr_by_team_year_sf_la_csv
# at the link below:
# https://www.dropbox.com/s/wn43cngo2wdle2b/hr_by_team_year_sf_la.csv
#
# Note that to differentiate between multiple categories on the
# same plot in ggplot, we can pass color in with the other arguments
# to aes, rather than in our geometry functions. For example,
# ggplot(data, aes(xvar, yvar, color=category_var)). This should help you
# in this exercise.
data = pandas.read_csv(hr_by_team_year_sf_la_csv)
plot = ggplot(aes('yearID', 'HR', color='teamID'), data) + geom_point() + geom_line() + \
ggtitle('Number of homeruns per year') + xlab('Year') + ylab('Homeruns')
return plot
print lineplot_compare(r"Data\hr_by_team_year_sf_la.csv")