In [4]:
import pandas as pd
from pandas import Series,DataFrame
In [6]:
titanic_df=pd.read_csv('train.csv')
In [7]:
titanic_df.head()
Out[7]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
In [8]:
titanic_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId    891 non-null int64
Survived       891 non-null int64
Pclass         891 non-null int64
Name           891 non-null object
Sex            891 non-null object
Age            714 non-null float64
SibSp          891 non-null int64
Parch          891 non-null int64
Ticket         891 non-null object
Fare           891 non-null float64
Cabin          204 non-null object
Embarked       889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.6+ KB
In [21]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [23]:
sns.factorplot('Sex',data=titanic_df,kind='count')
Out[23]:
<seaborn.axisgrid.FacetGrid at 0x93125f8>
In [24]:
sns.factorplot('Sex',data=titanic_df,hue='Pclass',kind='count')
Out[24]:
<seaborn.axisgrid.FacetGrid at 0xb310ac8>
In [25]:
sns.factorplot('Pclass',data=titanic_df,kind='count',hue='Sex')
Out[25]:
<seaborn.axisgrid.FacetGrid at 0xb3c7ba8>
In [26]:
def male_female_child(passenger):
    age,sex=passenger
    
    if age < 16:
        return 'child'
    else:
        return sex
In [29]:
titanic_df['person']=titanic_df[['Age','Sex']].apply(male_female_child,axis=1)
In [30]:
titanic_df[0:10]
Out[30]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked person
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S male
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C female
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S female
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S female
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S male
5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q male
6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S male
7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S child
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 NaN S female
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 NaN C child
In [32]:
sns.factorplot('Pclass',data=titanic_df,hue='person',kind='count')
Out[32]:
<seaborn.axisgrid.FacetGrid at 0xb145908>
In [33]:
titanic_df['Age'].hist(bins=70)
Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0xbb5d518>
In [34]:
titanic_df['Age'].mean()
Out[34]:
29.69911764705882
In [36]:
titanic_df['person'].value_counts()
Out[36]:
male      537
female    271
child      83
Name: person, dtype: int64
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [39]:
fig=sns.FacetGrid(titanic_df,hue='Sex',aspect=4)
fig.map(sns.kdeplot,'Age',shade=True)

oldest=titanic_df['Age'].max()

fig.set(xlim=(0,oldest))

fig.add_legend()
Out[39]:
<seaborn.axisgrid.FacetGrid at 0xc9f7978>
In [51]:
fig=sns.FacetGrid(titanic_df,hue='person',aspect=4)
fig.map(sns.kdeplot,'Age',shade=True)

oldest=titanic_df['Age'].max()

fig.set(xlim=(0,oldest))

fig.add_legend()
Out[51]:
<seaborn.axisgrid.FacetGrid at 0xeee3400>
In [41]:
fig=sns.FacetGrid(titanic_df,hue='Pclass',aspect=4)
fig.map(sns.kdeplot,'Age',shade=True)

oldest=titanic_df['Age'].max()

fig.set(xlim=(0,oldest))

fig.add_legend()
Out[41]:
<seaborn.axisgrid.FacetGrid at 0xd1242e8>
In [52]:
titanic_df.head()
Out[52]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked person
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S male
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C female
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S female
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S female
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S male
In [53]:
deck=titanic_df['Cabin'].dropna()
In [54]:
deck.head()
Out[54]:
1      C85
3     C123
6      E46
10      G6
11    C103
Name: Cabin, dtype: object
In [67]:
levels=[]

for level in deck:
    levels.append(level[0])
    
cabin_df=DataFrame(levels)
cabin_df.columns=['Cabin']
sns.factorplot('Cabin',data=cabin_df,palette='winter_d',kind='count')
Out[67]:
<seaborn.axisgrid.FacetGrid at 0x10200dd8>
In [69]:
cabin_df=cabin_df[cabin_df.Cabin != 'T']

sns.factorplot('Cabin',data=cabin_df,palette='summer',kind='count')
Out[69]:
<seaborn.axisgrid.FacetGrid at 0x1020de48>
In [70]:
titanic_df.head()
Out[70]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked person
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S male
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C female
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S female
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S female
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S male
In [81]:
sns.factorplot('Embarked',data=titanic_df,hue='Pclass',kind='count')
Out[81]:
<seaborn.axisgrid.FacetGrid at 0x10ea69b0>
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [83]:
# Who was alone and who was with family?
titanic_df.head()
Out[83]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked person
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S male
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C female
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S female
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S female
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S male
In [84]:
titanic_df['Alone']=titanic_df.SibSp+titanic_df.Parch
In [88]:
titanic_df['Alone']
Out[88]:
0       1
1       1
2       0
3       1
4       0
5       0
6       0
7       4
8       2
9       1
10      2
11      0
12      0
13      6
14      0
15      0
16      5
17      0
18      1
19      0
20      0
21      0
22      0
23      0
24      4
25      6
26      0
27      5
28      0
29      0
       ..
861     1
862     0
863    10
864     0
865     0
866     1
867     0
868     0
869     2
870     0
871     2
872     0
873     0
874     1
875     0
876     0
877     0
878     0
879     1
880     1
881     0
882     0
883     0
884     0
885     5
886     0
887     0
888     3
889     0
890     0
Name: Alone, Length: 891, dtype: int64
In [90]:
titanic_df['Alone'].loc[titanic_df['Alone'] > 0]='With Famity'

titanic_df['Alone'].loc[titanic_df['Alone'] == 0]='Alone'
In [91]:
titanic_df.head()
Out[91]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked person Alone
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S male With Famity
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C female With Famity
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S female Alone
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S female With Famity
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S male Alone
In [93]:
sns.factorplot('Alone',data=titanic_df,palette='Blues',kind='count')
Out[93]:
<seaborn.axisgrid.FacetGrid at 0xed24550>
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [96]:
titanic_df['Survivor']=titanic_df.Survived.map({0:'no',1:'yes'})

sns.factorplot('Survivor',data=titanic_df,palette='Set1',kind='count')
Out[96]:
<seaborn.axisgrid.FacetGrid at 0x101a6208>
In [98]:
sns.factorplot('Pclass','Survived',hue='person',data=titanic_df)
Out[98]:
<seaborn.axisgrid.FacetGrid at 0x10dca198>
In [99]:
sns.lmplot('Age','Survived',data=titanic_df)
Out[99]:
<seaborn.axisgrid.FacetGrid at 0x10cb32e8>
In [100]:
sns.lmplot('Age','Survived',hue='Pclass',data=titanic_df,palette='winter')
Out[100]:
<seaborn.axisgrid.FacetGrid at 0x10dca1d0>
In [101]:
generations=[10,20,40,60,80]

sns.lmplot('Age','Survived',hue='Pclass',data=titanic_df,palette='winter',x_bins=generations)
Out[101]:
<seaborn.axisgrid.FacetGrid at 0x116ed4e0>
In [102]:
sns.lmplot('Age','Survived',hue='Sex',data=titanic_df,palette='winter',x_bins=generations)
Out[102]:
<seaborn.axisgrid.FacetGrid at 0x116f89b0>

1) Did the deck have an effect on the passengers survival rate? Did this answer match up with your intuition?

2) Did having a family member increase the odds of surviving the crash?