You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
total_count=clinical_df.groupby("disease")["disease"].count()
total_count=clinical_df.groupby("disease").disease.count() # same as above# plot the number of samples for each cancer typetotal_count.plot(kind="bar");
Challenge-filter
# Option 1birth_reduced=birth_reduced.dropna(subset= ["year_of_birth", "vital_status"])
# Option 2birth_reduced=birth_reduced[-pd.isnull(birth_reduced["year_of_birth"])]
birth_reduced=birth_reduced[-pd.isnull(birth_reduced["vital_status"])]
# check to see that it workedpd.unique(birth_reduced["vital_status"])
Challenge-subset
clinical_subset=clinical_df[(clinical_df.vital_status=='alive') & (clinical_df.ethnicity=='hispanic or latino')]
Challenge-subset2
# grouping data by disease (x-axis)grouped_disease=clinical_df_patients.groupby("disease")
# extracting and storing only the primary_diagnosis count numbers (y-axis)primary_diagnosis_counts=grouped_disease.primary_diagnosis.count()
# creating a bar graph primary_diagnosis_counts.plot(title='Primary Diagnosis Numbers of Each Disease', kind='bar');
# semicolon in the previous line eliminates extra text in output# labeling axesplt.xlabel("Disease");
plt.ylabel("Frequency");