In [1]:
import pandas as pd
from pandas import Series,DataFrame
import numpy as np
In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline
In [3]:
from __future__ import division
In [4]:
import requests
In [5]:
from StringIO import StringIO
In [6]:
url = 'http://elections.huffingtonpost.com/pollster/2012-general-election-romney-vs-obama.csv'

source = requests.get(url).text

poll_data = StringIO(source)
In [7]:
poll_df = pd.read_csv(poll_data)
In [8]:
poll_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 586 entries, 0 to 585
Data columns (total 17 columns):
Pollster                  586 non-null object
Start Date                586 non-null object
End Date                  586 non-null object
Entry Date/Time (ET)      586 non-null object
Number of Observations    564 non-null float64
Population                586 non-null object
Mode                      586 non-null object
Obama                     586 non-null float64
Romney                    586 non-null float64
Undecided                 423 non-null float64
Other                     202 non-null float64
Pollster URL              586 non-null object
Source URL                584 non-null object
Partisan                  586 non-null object
Affiliation               586 non-null object
Question Text             0 non-null float64
Question Iteration        586 non-null int64
dtypes: float64(6), int64(1), object(10)
memory usage: 77.9+ KB
In [9]:
poll_df.head()
Out[9]:
Pollster Start Date End Date Entry Date/Time (ET) Number of Observations Population Mode Obama Romney Undecided Other Pollster URL Source URL Partisan Affiliation Question Text Question Iteration
0 Politico/GWU/Battleground 2012-11-04 2012-11-05 2012-11-06T08:40:26Z 1000.0 Likely Voters Live Phone 47.0 47.0 6.0 NaN http://elections.huffingtonpost.com/pollster/p... http://www.politico.com/news/stories/1112/8338... Nonpartisan None NaN 1
1 YouGov/Economist 2012-11-03 2012-11-05 2012-11-26T15:31:23Z 740.0 Likely Voters Internet 49.0 47.0 3.0 NaN http://elections.huffingtonpost.com/pollster/p... http://cdn.yougov.com/cumulus_uploads/document... Nonpartisan None NaN 1
2 Gravis Marketing 2012-11-03 2012-11-05 2012-11-06T09:22:02Z 872.0 Likely Voters Automated Phone 48.0 48.0 4.0 NaN http://elections.huffingtonpost.com/pollster/p... http://www.gravispolls.com/2012/11/gravis-mark... Nonpartisan None NaN 1
3 IBD/TIPP 2012-11-03 2012-11-05 2012-11-06T08:51:48Z 712.0 Likely Voters Live Phone 50.0 49.0 NaN 1.0 http://elections.huffingtonpost.com/pollster/p... http://news.investors.com/special-report/50841... Nonpartisan None NaN 1
4 Rasmussen 2012-11-03 2012-11-05 2012-11-06T08:47:50Z 1500.0 Likely Voters Automated Phone 48.0 49.0 NaN NaN http://elections.huffingtonpost.com/pollster/p... http://www.rasmussenreports.com/public_content... Nonpartisan None NaN 1
In [10]:
sns.factorplot('Affiliation',data=poll_df,kind='count')
Out[10]:
<seaborn.axisgrid.FacetGrid at 0x109a156d0>
In [11]:
sns.factorplot('Affiliation',data=poll_df,hue='Population',kind='count')
Out[11]:
<seaborn.axisgrid.FacetGrid at 0x109a155d0>
In [12]:
poll_df.head()
Out[12]:
Pollster Start Date End Date Entry Date/Time (ET) Number of Observations Population Mode Obama Romney Undecided Other Pollster URL Source URL Partisan Affiliation Question Text Question Iteration
0 Politico/GWU/Battleground 2012-11-04 2012-11-05 2012-11-06T08:40:26Z 1000.0 Likely Voters Live Phone 47.0 47.0 6.0 NaN http://elections.huffingtonpost.com/pollster/p... http://www.politico.com/news/stories/1112/8338... Nonpartisan None NaN 1
1 YouGov/Economist 2012-11-03 2012-11-05 2012-11-26T15:31:23Z 740.0 Likely Voters Internet 49.0 47.0 3.0 NaN http://elections.huffingtonpost.com/pollster/p... http://cdn.yougov.com/cumulus_uploads/document... Nonpartisan None NaN 1
2 Gravis Marketing 2012-11-03 2012-11-05 2012-11-06T09:22:02Z 872.0 Likely Voters Automated Phone 48.0 48.0 4.0 NaN http://elections.huffingtonpost.com/pollster/p... http://www.gravispolls.com/2012/11/gravis-mark... Nonpartisan None NaN 1
3 IBD/TIPP 2012-11-03 2012-11-05 2012-11-06T08:51:48Z 712.0 Likely Voters Live Phone 50.0 49.0 NaN 1.0 http://elections.huffingtonpost.com/pollster/p... http://news.investors.com/special-report/50841... Nonpartisan None NaN 1
4 Rasmussen 2012-11-03 2012-11-05 2012-11-06T08:47:50Z 1500.0 Likely Voters Automated Phone 48.0 49.0 NaN NaN http://elections.huffingtonpost.com/pollster/p... http://www.rasmussenreports.com/public_content... Nonpartisan None NaN 1
In [13]:
avg = pd.DataFrame(poll_df.mean())

avg.drop('Number of Observations',axis=0,inplace=True)
In [14]:
avg.head()
Out[14]:
0
Obama 46.805461
Romney 44.614334
Undecided 6.550827
Other 3.376238
Question Text NaN
In [15]:
std = pd.DataFrame(poll_df.std())

std.drop('Number of Observations',axis=0,inplace=True)
In [16]:
std.head()
Out[16]:
0
Obama 2.422058
Romney 2.906180
Undecided 3.701754
Other 2.692726
Question Text NaN
In [17]:
avg.plot(yerr=std,kind='bar',legend=False)
Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x10a4b93d0>
In [18]:
poll_avg = pd.concat([avg,std],axis=1)
In [19]:
poll_avg.columns = ['Average','STD']
In [20]:
poll_avg
Out[20]:
Average STD
Obama 46.805461 2.422058
Romney 44.614334 2.906180
Undecided 6.550827 3.701754
Other 3.376238 2.692726
Question Text NaN NaN
Question Iteration 1.000000 0.000000
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [21]:
poll_df.head()
Out[21]:
Pollster Start Date End Date Entry Date/Time (ET) Number of Observations Population Mode Obama Romney Undecided Other Pollster URL Source URL Partisan Affiliation Question Text Question Iteration
0 Politico/GWU/Battleground 2012-11-04 2012-11-05 2012-11-06T08:40:26Z 1000.0 Likely Voters Live Phone 47.0 47.0 6.0 NaN http://elections.huffingtonpost.com/pollster/p... http://www.politico.com/news/stories/1112/8338... Nonpartisan None NaN 1
1 YouGov/Economist 2012-11-03 2012-11-05 2012-11-26T15:31:23Z 740.0 Likely Voters Internet 49.0 47.0 3.0 NaN http://elections.huffingtonpost.com/pollster/p... http://cdn.yougov.com/cumulus_uploads/document... Nonpartisan None NaN 1
2 Gravis Marketing 2012-11-03 2012-11-05 2012-11-06T09:22:02Z 872.0 Likely Voters Automated Phone 48.0 48.0 4.0 NaN http://elections.huffingtonpost.com/pollster/p... http://www.gravispolls.com/2012/11/gravis-mark... Nonpartisan None NaN 1
3 IBD/TIPP 2012-11-03 2012-11-05 2012-11-06T08:51:48Z 712.0 Likely Voters Live Phone 50.0 49.0 NaN 1.0 http://elections.huffingtonpost.com/pollster/p... http://news.investors.com/special-report/50841... Nonpartisan None NaN 1
4 Rasmussen 2012-11-03 2012-11-05 2012-11-06T08:47:50Z 1500.0 Likely Voters Automated Phone 48.0 49.0 NaN NaN http://elections.huffingtonpost.com/pollster/p... http://www.rasmussenreports.com/public_content... Nonpartisan None NaN 1
In [22]:
poll_df.plot(x='End Date',y=['Obama','Romney','Undecided'],linestyle='',marker='o')
Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x10a5cddd0>
In [23]:
from datetime import datetime
In [24]:
poll_df['Difference']=(poll_df.Obama-poll_df.Romney)/100

poll_df.head()
Out[24]:
Pollster Start Date End Date Entry Date/Time (ET) Number of Observations Population Mode Obama Romney Undecided Other Pollster URL Source URL Partisan Affiliation Question Text Question Iteration Difference
0 Politico/GWU/Battleground 2012-11-04 2012-11-05 2012-11-06T08:40:26Z 1000.0 Likely Voters Live Phone 47.0 47.0 6.0 NaN http://elections.huffingtonpost.com/pollster/p... http://www.politico.com/news/stories/1112/8338... Nonpartisan None NaN 1 0.00
1 YouGov/Economist 2012-11-03 2012-11-05 2012-11-26T15:31:23Z 740.0 Likely Voters Internet 49.0 47.0 3.0 NaN http://elections.huffingtonpost.com/pollster/p... http://cdn.yougov.com/cumulus_uploads/document... Nonpartisan None NaN 1 0.02
2 Gravis Marketing 2012-11-03 2012-11-05 2012-11-06T09:22:02Z 872.0 Likely Voters Automated Phone 48.0 48.0 4.0 NaN http://elections.huffingtonpost.com/pollster/p... http://www.gravispolls.com/2012/11/gravis-mark... Nonpartisan None NaN 1 0.00
3 IBD/TIPP 2012-11-03 2012-11-05 2012-11-06T08:51:48Z 712.0 Likely Voters Live Phone 50.0 49.0 NaN 1.0 http://elections.huffingtonpost.com/pollster/p... http://news.investors.com/special-report/50841... Nonpartisan None NaN 1 0.01
4 Rasmussen 2012-11-03 2012-11-05 2012-11-06T08:47:50Z 1500.0 Likely Voters Automated Phone 48.0 49.0 NaN NaN http://elections.huffingtonpost.com/pollster/p... http://www.rasmussenreports.com/public_content... Nonpartisan None NaN 1 -0.01
In [25]:
poll_df=poll_df.groupby(['Start Date'],as_index=False).mean()

poll_df.head()
Out[25]:
Start Date Number of Observations Obama Romney Undecided Other Question Text Question Iteration Difference
0 2009-03-13 1403.0 44.0 44.0 12.0 NaN NaN 1 0.00
1 2009-04-17 686.0 50.0 39.0 11.0 NaN NaN 1 0.11
2 2009-05-14 1000.0 53.0 35.0 12.0 NaN NaN 1 0.18
3 2009-06-12 638.0 48.0 40.0 12.0 NaN NaN 1 0.08
4 2009-07-15 577.0 49.0 40.0 11.0 NaN NaN 1 0.09
In [26]:
poll_df.plot('Start Date','Difference',figsize=(12,4),marker='o',linestyle='-',color='purple')
Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x10a5b0e50>
In [28]:
row_in=0
xlimit=[]

for date in poll_df['Start Date']:
    if date[0:7]=='2012-10':
        xlimit.append(row_in)
        row_in+=1
    else:
        row_in+=1
print min(xlimit)
print max(xlimit)
325
352
In [34]:
poll_df.plot('Start Date','Difference',figsize=(12,4),marker='o',linestyle='-',color='purple',xlim=(329,356))

# Oct 3rd
plt.axvline(x=329+2,linewidth=4,color='grey')

# Oct 11th
plt.axvline(x=329+10,linewidth=4,color='grey')

# Oct 22nd
plt.axvline(x=329+21,linewidth=4,color='grey')
Out[34]:
<matplotlib.lines.Line2D at 0x10af3f990>
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [97]:
pwd
Out[97]:
u'/Users/takuoyoneda/Documents/git/learning_python_for_data_analysis_and_visualization/learning_python_for_data_analysis_and_visualization/election_analysis'
In [98]:
donor_df = pd.read_csv('Election_Donor_Data.csv')
In [99]:
donor_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1001731 entries, 0 to 1001730
Data columns (total 16 columns):
cmte_id              1001731 non-null object
cand_id              1001731 non-null object
cand_nm              1001731 non-null object
contbr_nm            1001731 non-null object
contbr_city          1001712 non-null object
contbr_st            1001727 non-null object
contbr_zip           1001620 non-null object
contbr_employer      988002 non-null object
contbr_occupation    993301 non-null object
contb_receipt_amt    1001731 non-null float64
contb_receipt_dt     1001731 non-null object
receipt_desc         14166 non-null object
memo_cd              92482 non-null object
memo_text            97770 non-null object
form_tp              1001731 non-null object
file_num             1001731 non-null int64
dtypes: float64(1), int64(1), object(14)
memory usage: 122.3+ MB
In [100]:
donor_df.head()
Out[100]:
cmte_id cand_id cand_nm contbr_nm contbr_city contbr_st contbr_zip contbr_employer contbr_occupation contb_receipt_amt contb_receipt_dt receipt_desc memo_cd memo_text form_tp file_num
0 C00410118 P20002978 Bachmann, Michelle HARVEY, WILLIAM MOBILE AL 3.6601e+08 RETIRED RETIRED 250.0 20-JUN-11 NaN NaN NaN SA17A 736166
1 C00410118 P20002978 Bachmann, Michelle HARVEY, WILLIAM MOBILE AL 3.6601e+08 RETIRED RETIRED 50.0 23-JUN-11 NaN NaN NaN SA17A 736166
2 C00410118 P20002978 Bachmann, Michelle SMITH, LANIER LANETT AL 3.68633e+08 INFORMATION REQUESTED INFORMATION REQUESTED 250.0 05-JUL-11 NaN NaN NaN SA17A 749073
3 C00410118 P20002978 Bachmann, Michelle BLEVINS, DARONDA PIGGOTT AR 7.24548e+08 NONE RETIRED 250.0 01-AUG-11 NaN NaN NaN SA17A 749073
4 C00410118 P20002978 Bachmann, Michelle WARDENBURG, HAROLD HOT SPRINGS NATION AR 7.19016e+08 NONE RETIRED 300.0 20-JUN-11 NaN NaN NaN SA17A 736166
In [101]:
donor_df['contb_receipt_amt'].value_counts()
Out[101]:
 100.00     178188
 50.00      137584
 25.00      110345
 250.00      91182
 500.00      57984
 2500.00     49005
 35.00       37237
 1000.00     36494
 10.00       33986
 200.00      27813
 20.00       17565
 15.00       16163
 150.00      14600
 75.00       13647
 201.20      11718
 30.00       11381
 300.00      11204
 20.12        9897
 5.00         9024
 40.00        5007
 2000.00      4128
 55.00        3760
 1500.00      3705
 3.00         3383
 60.00        3084
 400.00       3066
-2500.00      2727
 110.00       2554
 125.00       2520
 19.00        2474
             ...  
 174.80          1
 7.27            1
 1219.00         1
 1884.88         1
 162.25          1
 218.31          1
 78.62           1
 203.16          1
 53.11           1
 499.66          1
 19.53           1
 188.60          1
 47.10           1
 19.85           1
 28.83           1
 202.59          1
-5500.00         1
 9.25            1
 202.66          1
 1205.00         1
 80.73           1
 115.07          1
 213.69          1
 70.76           1
 144.13          1
 97.15           1
 122.32          1
 188.65          1
 122.40          1
 132.12          1
Name: contb_receipt_amt, Length: 8079, dtype: int64
In [102]:
don_mean = donor_df['contb_receipt_amt'].mean()

don_std = donor_df['contb_receipt_amt'].std()

print 'The average donation was %.2f with a std %.2f' %(don_mean,don_std)
The average donation was 298.24 with a std 3749.67
In [103]:
top_donor = donor_df['contb_receipt_amt'].copy()

top_donor.sort_index()
#top_donor.sort_values()

top_donor
Out[103]:
0           250.0
1            50.0
2           250.0
3           250.0
4           300.0
5           500.0
6           250.0
7           250.0
8           250.0
9           250.0
10          250.0
11          500.0
12          250.0
13          250.0
14          250.0
15          300.0
16          500.0
17         1000.0
18          250.0
19          300.0
20          500.0
21          250.0
22         2500.0
23         2500.0
24          150.0
25          200.0
26          100.0
27          250.0
28          500.0
29          250.0
            ...  
1001701    2500.0
1001702    2500.0
1001703   -2500.0
1001704   -2500.0
1001705    1000.0
1001706    2500.0
1001707   -2500.0
1001708    2500.0
1001709   -2500.0
1001710   -2500.0
1001711    1000.0
1001712    2500.0
1001713    2500.0
1001714     250.0
1001715     250.0
1001716    1000.0
1001717     100.0
1001718    2500.0
1001719    2500.0
1001720     100.0
1001721     250.0
1001722     100.0
1001723     100.0
1001724     500.0
1001725    2500.0
1001726    5000.0
1001727    2500.0
1001728     500.0
1001729     500.0
1001730    2500.0
Name: contb_receipt_amt, Length: 1001731, dtype: float64
In [104]:
top_donor = top_donor[top_donor > 0]

top_donor.sort_index()
Out[104]:
0           250.0
1            50.0
2           250.0
3           250.0
4           300.0
5           500.0
6           250.0
7           250.0
8           250.0
9           250.0
10          250.0
11          500.0
12          250.0
13          250.0
14          250.0
15          300.0
16          500.0
17         1000.0
18          250.0
19          300.0
20          500.0
21          250.0
22         2500.0
23         2500.0
24          150.0
25          200.0
26          100.0
27          250.0
28          500.0
29          250.0
            ...  
1001696    1000.0
1001697    1500.0
1001698    2500.0
1001699    2500.0
1001700     300.0
1001701    2500.0
1001702    2500.0
1001705    1000.0
1001706    2500.0
1001708    2500.0
1001711    1000.0
1001712    2500.0
1001713    2500.0
1001714     250.0
1001715     250.0
1001716    1000.0
1001717     100.0
1001718    2500.0
1001719    2500.0
1001720     100.0
1001721     250.0
1001722     100.0
1001723     100.0
1001724     500.0
1001725    2500.0
1001726    5000.0
1001727    2500.0
1001728     500.0
1001729     500.0
1001730    2500.0
Name: contb_receipt_amt, Length: 991475, dtype: float64
In [105]:
top_donor.value_counts().head(10)
Out[105]:
100.0     178188
50.0      137584
25.0      110345
250.0      91182
500.0      57984
2500.0     49005
35.0       37237
1000.0     36494
10.0       33986
200.0      27813
Name: contb_receipt_amt, dtype: int64
In [106]:
com_don = top_donor[top_donor < 2500]

com_don.hist(bins=100,)
Out[106]:
<matplotlib.axes._subplots.AxesSubplot at 0x136ae8050>
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [107]:
candidates = donor_df.cand_nm.unique()

candidates
Out[107]:
array(['Bachmann, Michelle', 'Romney, Mitt', 'Obama, Barack',
       "Roemer, Charles E. 'Buddy' III", 'Pawlenty, Timothy',
       'Johnson, Gary Earl', 'Paul, Ron', 'Santorum, Rick', 'Cain, Herman',
       'Gingrich, Newt', 'McCotter, Thaddeus G', 'Huntsman, Jon',
       'Perry, Rick'], dtype=object)
In [108]:
party_map = {'Bachmann, Michelle': 'Republican',
           'Cain, Herman': 'Republican',
           'Gingrich, Newt': 'Republican',
           'Huntsman, Jon': 'Republican',
           'Johnson, Gary Earl': 'Republican',
           'McCotter, Thaddeus G': 'Republican',
           'Obama, Barack': 'Democrat',
           'Paul, Ron': 'Republican',
           'Pawlenty, Timothy': 'Republican',
           'Perry, Rick': 'Republican',
           "Roemer, Charles E. 'Buddy' III": 'Republican',
           'Romney, Mitt': 'Republican',
           'Santorum, Rick': 'Republican'}

donor_df['Party']=donor_df.cand_nm.map(party_map)
In [109]:
donor_df=donor_df[donor_df.contb_receipt_amt > 0]
In [110]:
donor_df.head()
Out[110]:
cmte_id cand_id cand_nm contbr_nm contbr_city contbr_st contbr_zip contbr_employer contbr_occupation contb_receipt_amt contb_receipt_dt receipt_desc memo_cd memo_text form_tp file_num Party
0 C00410118 P20002978 Bachmann, Michelle HARVEY, WILLIAM MOBILE AL 3.6601e+08 RETIRED RETIRED 250.0 20-JUN-11 NaN NaN NaN SA17A 736166 Republican
1 C00410118 P20002978 Bachmann, Michelle HARVEY, WILLIAM MOBILE AL 3.6601e+08 RETIRED RETIRED 50.0 23-JUN-11 NaN NaN NaN SA17A 736166 Republican
2 C00410118 P20002978 Bachmann, Michelle SMITH, LANIER LANETT AL 3.68633e+08 INFORMATION REQUESTED INFORMATION REQUESTED 250.0 05-JUL-11 NaN NaN NaN SA17A 749073 Republican
3 C00410118 P20002978 Bachmann, Michelle BLEVINS, DARONDA PIGGOTT AR 7.24548e+08 NONE RETIRED 250.0 01-AUG-11 NaN NaN NaN SA17A 749073 Republican
4 C00410118 P20002978 Bachmann, Michelle WARDENBURG, HAROLD HOT SPRINGS NATION AR 7.19016e+08 NONE RETIRED 300.0 20-JUN-11 NaN NaN NaN SA17A 736166 Republican
In [111]:
donor_df.groupby('cand_nm')['contb_receipt_amt'].count()
Out[111]:
cand_nm
Bachmann, Michelle                 13082
Cain, Herman                       20052
Gingrich, Newt                     46883
Huntsman, Jon                       4066
Johnson, Gary Earl                  1234
McCotter, Thaddeus G                  73
Obama, Barack                     589127
Paul, Ron                         143161
Pawlenty, Timothy                   3844
Perry, Rick                        12709
Roemer, Charles E. 'Buddy' III      5844
Romney, Mitt                      105155
Santorum, Rick                     46245
Name: contb_receipt_amt, dtype: int64
In [112]:
donor_df.groupby('cand_nm')['contb_receipt_amt'].sum()
Out[112]:
cand_nm
Bachmann, Michelle                2.711439e+06
Cain, Herman                      7.101082e+06
Gingrich, Newt                    1.283277e+07
Huntsman, Jon                     3.330373e+06
Johnson, Gary Earl                5.669616e+05
McCotter, Thaddeus G              3.903000e+04
Obama, Barack                     1.358774e+08
Paul, Ron                         2.100962e+07
Pawlenty, Timothy                 6.004819e+06
Perry, Rick                       2.030575e+07
Roemer, Charles E. 'Buddy' III    3.730099e+05
Romney, Mitt                      8.833591e+07
Santorum, Rick                    1.104316e+07
Name: contb_receipt_amt, dtype: float64
In [113]:
cand_amount=donor_df.groupby('cand_nm')['contb_receipt_amt'].sum()

i=0

for don in cand_amount:
    print ' The candidate %s raise %.0f dolloars' %(cand_amount.index[i],don)
    print ''
    i+=1
 The candidate Bachmann, Michelle raise 2711439 dolloars

 The candidate Cain, Herman raise 7101082 dolloars

 The candidate Gingrich, Newt raise 12832770 dolloars

 The candidate Huntsman, Jon raise 3330373 dolloars

 The candidate Johnson, Gary Earl raise 566962 dolloars

 The candidate McCotter, Thaddeus G raise 39030 dolloars

 The candidate Obama, Barack raise 135877427 dolloars

 The candidate Paul, Ron raise 21009620 dolloars

 The candidate Pawlenty, Timothy raise 6004819 dolloars

 The candidate Perry, Rick raise 20305754 dolloars

 The candidate Roemer, Charles E. 'Buddy' III raise 373010 dolloars

 The candidate Romney, Mitt raise 88335908 dolloars

 The candidate Santorum, Rick raise 11043159 dolloars

In [114]:
cand_amount.plot(kind='bar')
Out[114]:
<matplotlib.axes._subplots.AxesSubplot at 0x10e396410>
In [115]:
donor_df.groupby('Party')['contb_receipt_amt'].sum().plot(kind='bar')
Out[115]:
<matplotlib.axes._subplots.AxesSubplot at 0x10e504610>
In [116]:
occupation_df=donor_df.pivot_table('contb_receipt_amt',
                                   index='contbr_occupation',
                                   columns='Party',
                                   aggfunc='sum')
In [117]:
occupation_df.head()
Out[117]:
Party Democrat Republican
contbr_occupation
MIXED-MEDIA ARTIST / STORYTELLER 100.0 NaN
AREA VICE PRESIDENT 250.0 NaN
RESEARCH ASSOCIATE 100.0 NaN
TEACHER 500.0 NaN
THERAPIST 3900.0 NaN
In [118]:
occupation_df.tail()
Out[118]:
Party Democrat Republican
contbr_occupation
ZOOKEEPER 35.0 NaN
ZOOLOGIST 400.0 NaN
ZOOLOGY EDUCATION 25.0 NaN
\NONE\ NaN 250.0
~ NaN 75.0
In [119]:
occupation_df.shape
Out[119]:
(45067, 2)
In [120]:
occupation_df=occupation_df[occupation_df.sum(1) > 1000000]
In [121]:
occupation_df.shape
Out[121]:
(31, 2)
In [122]:
occupation_df.plot(kind='bar')
Out[122]:
<matplotlib.axes._subplots.AxesSubplot at 0x10ec25810>
In [123]:
occupation_df.plot(kind='barh',figsize=(10,12),cmap='seismic')
Out[123]:
<matplotlib.axes._subplots.AxesSubplot at 0x10f4948d0>
In [124]:
occupation_df.drop(['INFORMATION REQUESTED PER BEST EFFORTS','INFORMATION REQUESTED'],axis=0,inplace=True)
In [125]:
occupation_df.loc['CEO']=occupation_df.loc['CEO']+occupation_df.loc['C.E.O.']

occupation_df.drop('C.E.O.',inplace=True)
In [126]:
occupation_df.plot(kind='barh',figsize=(10,12),cmap='seismic')
Out[126]:
<matplotlib.axes._subplots.AxesSubplot at 0x10f57a690>
In [ ]: