In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
In [2]:
women_degrees = pd.read_csv("/Users/33Phoebe/Documents/OneDrive/Data Scientist Path/Data Sets/percent-bachelors-degrees-women-usa.csv")
In [3]:
women_degrees.head()
Out[3]:
this is a cleaned dataset based on NCES 2013 Digest of Education Statistics, download
In [4]:
women_degrees.plot("Year", "Biology")
Out[4]:
In [5]:
women_degrees.head()
Out[5]:
In [12]:
fig, ax = plt.subplots()
ax.plot(women_degrees["Year"], women_degrees["Biology"], c = "blue", label = "Women")
ax.plot(women_degrees["Year"], 100-women_degrees["Biology"], c = "green", label = "Men")
ax.tick_params(bottom = "off", left = "off") #turn off ticks
for key in ax.spines:
ax.spines[key].set_visible(False) #turn off spines-could use print(ax.spines) to see the existing spines
ax.set_title("Percentage of Biology Degrees Awarded By Gender")
ax.legend(loc = "upper right")
plt.show()
In [18]:
#Display a similar chart for four majors
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
fig = plt.figure(figsize=(12, 12))
#set line colors to color_blind friendly dark_blue and orange
cb_dark_blue = (0/255, 107/255, 164/255)
cb_orange = (255/255, 128/255, 14/255)
for sp in range(0,4):
ax = fig.add_subplot(2,2,sp+1)
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, linewidth = 2, label='Women') #adjust the line width with linewidth
ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, linewidth = 2, label='Men')
# Add your code here.
ax.set_xlim(1968, 2011)
ax.set_ylim(0, 100)
ax.tick_params(bottom = 'off', left = 'off', top = "off", right = "off")
for key in ax.spines:
ax.spines[key].set_visible(False)
ax.set_title(major_cats[sp])
# Calling pyplot.legend() here will add the legend to the last subplot that was created.
plt.legend(loc='upper right')
plt.show()
In [20]:
#change the layout of the chart to 1*6 instead of 2 * 2, added two more major for comparison
stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
fig = plt.figure(figsize=(18, 3))
for sp in range(0,6):
ax = fig.add_subplot(1,6,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=2)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=2)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")
#add text annotation to lines for the 1st and last graph
if sp==0:
ax.text(2005, 87, "Men")
ax.text(2002, 8, "Women")
elif sp==5:
ax.text(2005, 62, "Men")
ax.text(2001, 35, "Women")
plt.legend(loc='upper right')
plt.show()
In [58]:
#add all the majors and grouped by three categories
stem_cats = ['Psychology', 'Biology', 'Math and Statistics', 'Physical Sciences', 'Computer Science', 'Engineering']
lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History']
other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture']
fig = plt.figure(figsize=(12, 18))
categories = [stem_cats, lib_arts_cats, other_cats]
count = 0
for cat in categories:
count += 1
for sp in range(len(cat)):
ax = fig.add_subplot(6,3,sp*3 + count)
ax.plot(women_degrees['Year'], women_degrees[cat[sp]], c=cb_dark_blue, label='Women', linewidth=2)
ax.plot(women_degrees['Year'], 100-women_degrees[cat[sp]], c=cb_orange, label='Men', linewidth=2)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(cat[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom = "off")
#set ticks for only 0 and 100 on y-axis
ax.set_yticks([0, 100])
#add a line at 50 to indicate where women = men with the 3rd color in color-blind 10 palette, alpha indicates
#the transparency with range 0~1
ax.axhline(50, c = (171/255, 171/255, 171/255), alpha = 0.3)
#add text annotation to lines for the 1st and last graph
if sp==0:
ax.text(2005, 87, "Women")
ax.text(2002, 8, "Men")
elif sp==len(cat)-1 and sp != 4:
ax.text(2005, 62, "Men")
ax.text(2001, 35, "Women")
#turn on the labelbottom for the bottom graph for a cleaner display
if sp==len(cat)-1:
ax.tick_params(labelbottom = "on")
#save plots to a single image file *has to be done before plt.show(), it'll be saved to the jupyter work folder
plt.savefig("gender_degrees.png")
plt.show()
In [61]:
matplotlib.get_backend()
Out[61]: