In [5]:
import sqlite3
import pandas as pd
conn = sqlite3.connect("Data/factbook.db")
q = "SELECT * FROM sqlite_master WHERE type = 'table';"
pd.read_sql_query(q, conn)
Out[5]:
In [6]:
q1 = "SELECT * FROM facts LIMIT 5;"
pd.read_sql_query(q1, conn)
Out[6]:
In [8]:
q2 = "SELECT MIN(population), MAX(population), MIN(population_growth), MAX(population_growth) FROM facts;"
pd.read_sql_query(q2, conn)
Out[8]:
In [9]:
q3 = "SELECT name, MIN(population) FROM facts;"
pd.read_sql_query(q3, conn)
Out[9]:
In [10]:
q4 = "SELECT name, MAX(population) FROM facts;"
pd.read_sql_query(q4, conn)
Out[10]:
Findings:¶
- instead of listing the country names, the table also include continent name like Antarctica or group name "World" of all the people in the world which are not comparable to other data in the table
In [37]:
q5 = "SELECT population, population_growth, birth_rate, death_rate FROM facts WHERE (name != 'World' AND name !='Antarctica')"
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
#Comment portion is my original way, below is an alternative way
#df = pd.read_sql_query(q5, conn)
# cols = df.columns.tolist()
# fig = plt.figure(figsize =(12, 12))
# for i in range(4):
# ax = fig.add_subplot(2, 2, i+1)
# data = df[cols[i]][df[cols[i]].isnull() == False]
# ax.set_title(cols[i])
# ax.hist(data)
fig = plt.figure(figsize = (12,12))
ax = fig.add_subplot(111) #same as add_subplot(1, 1, 1)
pd.read_sql_query(q5, conn).hist(ax = ax)
Out[37]:
In [66]:
q6 = '''
SELECT name, population/area_land pop_density FROM facts
WHERE (name != 'World' AND name !='Antarctica')
ORDER BY pop_density DESC
LIMIT 30'''
fig, ax = plt.subplots(figsize = (12, 12))
df = pd.read_sql_query(q6, conn)
df = df.set_index('name')
print(df.head())
df.plot.barh(ax = ax)
Out[66]: