Change in Percentage of Women Awarded Bachelor's Degrees by Major 1970-2011

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
In [2]:
women_degrees = pd.read_csv("/Users/33Phoebe/Documents/OneDrive/Data Scientist Path/Data Sets/percent-bachelors-degrees-women-usa.csv")
In [3]:
women_degrees.head()
Out[3]:
Year Agriculture Architecture Art and Performance Biology Business Communications and Journalism Computer Science Education Engineering English Foreign Languages Health Professions Math and Statistics Physical Sciences Psychology Public Administration Social Sciences and History
0 1970 4.229798 11.921005 59.7 29.088363 9.064439 35.3 13.6 74.535328 0.8 65.570923 73.8 77.1 38.0 13.8 44.4 68.4 36.8
1 1971 5.452797 12.003106 59.9 29.394403 9.503187 35.5 13.6 74.149204 1.0 64.556485 73.9 75.5 39.0 14.9 46.2 65.5 36.2
2 1972 7.420710 13.214594 60.4 29.810221 10.558962 36.6 14.9 73.554520 1.2 63.664263 74.6 76.9 40.2 14.8 47.6 62.6 36.1
3 1973 9.653602 14.791613 60.2 31.147915 12.804602 38.4 16.4 73.501814 1.6 62.941502 74.9 77.4 40.9 16.5 50.4 64.3 36.4
4 1974 14.074623 17.444688 61.9 32.996183 16.204850 40.5 18.9 73.336811 2.2 62.413412 75.3 77.9 41.8 18.2 52.6 66.1 37.3

this is a cleaned dataset based on NCES 2013 Digest of Education Statistics, download

In [4]:
women_degrees.plot("Year", "Biology")
Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d1fd198>
In [5]:
women_degrees.head()
Out[5]:
Year Agriculture Architecture Art and Performance Biology Business Communications and Journalism Computer Science Education Engineering English Foreign Languages Health Professions Math and Statistics Physical Sciences Psychology Public Administration Social Sciences and History
0 1970 4.229798 11.921005 59.7 29.088363 9.064439 35.3 13.6 74.535328 0.8 65.570923 73.8 77.1 38.0 13.8 44.4 68.4 36.8
1 1971 5.452797 12.003106 59.9 29.394403 9.503187 35.5 13.6 74.149204 1.0 64.556485 73.9 75.5 39.0 14.9 46.2 65.5 36.2
2 1972 7.420710 13.214594 60.4 29.810221 10.558962 36.6 14.9 73.554520 1.2 63.664263 74.6 76.9 40.2 14.8 47.6 62.6 36.1
3 1973 9.653602 14.791613 60.2 31.147915 12.804602 38.4 16.4 73.501814 1.6 62.941502 74.9 77.4 40.9 16.5 50.4 64.3 36.4
4 1974 14.074623 17.444688 61.9 32.996183 16.204850 40.5 18.9 73.336811 2.2 62.413412 75.3 77.9 41.8 18.2 52.6 66.1 37.3
In [12]:
fig, ax = plt.subplots()
ax.plot(women_degrees["Year"], women_degrees["Biology"], c = "blue", label = "Women")
ax.plot(women_degrees["Year"], 100-women_degrees["Biology"], c = "green", label = "Men")
ax.tick_params(bottom = "off", left = "off") #turn off ticks
for key in ax.spines:
    ax.spines[key].set_visible(False) #turn off spines-could use print(ax.spines) to see the existing spines
ax.set_title("Percentage of Biology Degrees Awarded By Gender")
ax.legend(loc = "upper right")
plt.show()
In [18]:
#Display a similar chart for four majors
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
fig = plt.figure(figsize=(12, 12))

#set line colors to color_blind friendly dark_blue and orange
cb_dark_blue = (0/255, 107/255, 164/255)
cb_orange = (255/255, 128/255, 14/255)
for sp in range(0,4):
    ax = fig.add_subplot(2,2,sp+1)
    ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, linewidth = 2, label='Women') #adjust the line width with linewidth
    ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, linewidth = 2, label='Men')
    # Add your code here.
    ax.set_xlim(1968, 2011)
    ax.set_ylim(0, 100)
    ax.tick_params(bottom = 'off', left = 'off', top = "off", right = "off")
    for key in ax.spines:
        ax.spines[key].set_visible(False)
    ax.set_title(major_cats[sp])

# Calling pyplot.legend() here will add the legend to the last subplot that was created.
plt.legend(loc='upper right')
plt.show()
In [20]:
#change the layout of the chart to 1*6 instead of 2 * 2, added two more major for comparison
stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']

fig = plt.figure(figsize=(18, 3))

for sp in range(0,6):
    ax = fig.add_subplot(1,6,sp+1)
    ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=2)
    ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=2)
    for key,spine in ax.spines.items():
        spine.set_visible(False)
    ax.set_xlim(1968, 2011)
    ax.set_ylim(0,100)
    ax.set_title(stem_cats[sp])
    ax.tick_params(bottom="off", top="off", left="off", right="off")
    #add text annotation to lines for the 1st and last graph
    if sp==0:
        ax.text(2005, 87, "Men")
        ax.text(2002, 8, "Women")
    elif sp==5:
        ax.text(2005, 62, "Men")
        ax.text(2001, 35, "Women")
plt.legend(loc='upper right')
plt.show()
In [58]:
#add all the majors and grouped by three categories
stem_cats = ['Psychology', 'Biology', 'Math and Statistics', 'Physical Sciences', 'Computer Science', 'Engineering']
lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History']
other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture']

fig = plt.figure(figsize=(12, 18))
categories = [stem_cats, lib_arts_cats, other_cats]
count = 0
for cat in categories:
    count += 1
    for sp in range(len(cat)):
        ax = fig.add_subplot(6,3,sp*3 + count)
        ax.plot(women_degrees['Year'], women_degrees[cat[sp]], c=cb_dark_blue, label='Women', linewidth=2)
        ax.plot(women_degrees['Year'], 100-women_degrees[cat[sp]], c=cb_orange, label='Men', linewidth=2)
        for key,spine in ax.spines.items():
            spine.set_visible(False)
        ax.set_xlim(1968, 2011)
        ax.set_ylim(0,100)
        ax.set_title(cat[sp])
        ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom = "off")
        #set ticks for only 0 and 100 on y-axis
        ax.set_yticks([0, 100])
        #add a line at 50 to indicate where women = men with the 3rd color in color-blind 10 palette, alpha indicates
        #the transparency with range 0~1
        ax.axhline(50, c = (171/255, 171/255, 171/255), alpha = 0.3)
        #add text annotation to lines for the 1st and last graph
        if sp==0:
            ax.text(2005, 87, "Women")
            ax.text(2002, 8, "Men")
        elif sp==len(cat)-1 and sp != 4:
            ax.text(2005, 62, "Men")
            ax.text(2001, 35, "Women")
        #turn on the labelbottom for the bottom graph for a cleaner display
        if sp==len(cat)-1:
            ax.tick_params(labelbottom = "on")
#save plots to a single image file *has to be done before plt.show(), it'll be saved to the jupyter work folder
plt.savefig("gender_degrees.png")
plt.show()
In [61]:
matplotlib.get_backend()
Out[61]:
'module://ipykernel.pylab.backend_inline'

social