{
"cells": [
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('titanic4.csv')"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" PassengerId | \n",
" Survived | \n",
" Pclass | \n",
" Name | \n",
" Sex | \n",
" Age | \n",
" SibSp | \n",
" Parch | \n",
" Ticket | \n",
" Fare | \n",
" Cabin | \n",
" Embarked | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 3 | \n",
" Braund, Mr. Owen Harris | \n",
" male | \n",
" 22.0 | \n",
" 1 | \n",
" 0 | \n",
" A/5 21171 | \n",
" 7.2500 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
" female | \n",
" 38.0 | \n",
" 1 | \n",
" 0 | \n",
" PC 17599 | \n",
" 71.2833 | \n",
" C85 | \n",
" C | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 1 | \n",
" 3 | \n",
" Heikkinen, Miss. Laina | \n",
" female | \n",
" 26.0 | \n",
" 0 | \n",
" 0 | \n",
" STON/O2. 3101282 | \n",
" 7.9250 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 1 | \n",
" 1 | \n",
" Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
" female | \n",
" 35.0 | \n",
" 1 | \n",
" 0 | \n",
" 113803 | \n",
" 53.1000 | \n",
" C123 | \n",
" S | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 0 | \n",
" 3 | \n",
" Allen, Mr. William Henry | \n",
" male | \n",
" 35.0 | \n",
" 0 | \n",
" 0 | \n",
" 373450 | \n",
" 8.0500 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
" 5 | \n",
" 6 | \n",
" 0 | \n",
" 3 | \n",
" Moran, Mr. James | \n",
" male | \n",
" NaN | \n",
" 0 | \n",
" 0 | \n",
" 330877 | \n",
" 8.4583 | \n",
" NaN | \n",
" Q | \n",
"
\n",
" \n",
" 6 | \n",
" 7 | \n",
" 0 | \n",
" 1 | \n",
" McCarthy, Mr. Timothy J | \n",
" male | \n",
" 54.0 | \n",
" 0 | \n",
" 0 | \n",
" 17463 | \n",
" 51.8625 | \n",
" E46 | \n",
" S | \n",
"
\n",
" \n",
" 7 | \n",
" 8 | \n",
" 0 | \n",
" 3 | \n",
" Palsson, Master. Gosta Leonard | \n",
" male | \n",
" 2.0 | \n",
" 3 | \n",
" 1 | \n",
" 349909 | \n",
" 21.0750 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
" 8 | \n",
" 9 | \n",
" 1 | \n",
" 3 | \n",
" Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | \n",
" female | \n",
" 27.0 | \n",
" 0 | \n",
" 2 | \n",
" 347742 | \n",
" 11.1333 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
" 9 | \n",
" 10 | \n",
" 1 | \n",
" 2 | \n",
" Nasser, Mrs. Nicholas (Adele Achem) | \n",
" female | \n",
" 14.0 | \n",
" 1 | \n",
" 0 | \n",
" 237736 | \n",
" 30.0708 | \n",
" NaN | \n",
" C | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" PassengerId Survived Pclass \\\n",
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"5 6 0 3 \n",
"6 7 0 1 \n",
"7 8 0 3 \n",
"8 9 1 3 \n",
"9 10 1 2 \n",
"\n",
" Name Sex Age SibSp \\\n",
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"4 Allen, Mr. William Henry male 35.0 0 \n",
"5 Moran, Mr. James male NaN 0 \n",
"6 McCarthy, Mr. Timothy J male 54.0 0 \n",
"7 Palsson, Master. Gosta Leonard male 2.0 3 \n",
"8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n",
"9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"0 0 A/5 21171 7.2500 NaN S \n",
"1 0 PC 17599 71.2833 C85 C \n",
"2 0 STON/O2. 3101282 7.9250 NaN S \n",
"3 0 113803 53.1000 C123 S \n",
"4 0 373450 8.0500 NaN S \n",
"5 0 330877 8.4583 NaN Q \n",
"6 0 17463 51.8625 E46 S \n",
"7 1 349909 21.0750 NaN S \n",
"8 2 347742 11.1333 NaN S \n",
"9 0 237736 30.0708 NaN C "
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pclass | \n",
" Sex | \n",
" Age | \n",
" SibSp | \n",
" Parch | \n",
" Fare | \n",
" Survived | \n",
" Ticket | \n",
" Cabin | \n",
" Embarked | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 | \n",
" male | \n",
" 22.0 | \n",
" 1 | \n",
" 0 | \n",
" 7.2500 | \n",
" 0 | \n",
" A/5 21171 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" female | \n",
" 38.0 | \n",
" 1 | \n",
" 0 | \n",
" 71.2833 | \n",
" 1 | \n",
" PC 17599 | \n",
" C85 | \n",
" C | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" female | \n",
" 26.0 | \n",
" 0 | \n",
" 0 | \n",
" 7.9250 | \n",
" 1 | \n",
" STON/O2. 3101282 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" female | \n",
" 35.0 | \n",
" 1 | \n",
" 0 | \n",
" 53.1000 | \n",
" 1 | \n",
" 113803 | \n",
" C123 | \n",
" S | \n",
"
\n",
" \n",
" 4 | \n",
" 3 | \n",
" male | \n",
" 35.0 | \n",
" 0 | \n",
" 0 | \n",
" 8.0500 | \n",
" 0 | \n",
" 373450 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pclass Sex Age SibSp Parch Fare Survived Ticket \\\n",
"0 3 male 22.0 1 0 7.2500 0 A/5 21171 \n",
"1 1 female 38.0 1 0 71.2833 1 PC 17599 \n",
"2 3 female 26.0 0 0 7.9250 1 STON/O2. 3101282 \n",
"3 1 female 35.0 1 0 53.1000 1 113803 \n",
"4 3 male 35.0 0 0 8.0500 0 373450 \n",
"\n",
" Cabin Embarked \n",
"0 NaN S \n",
"1 C85 C \n",
"2 NaN S \n",
"3 C123 S \n",
"4 NaN S "
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Survived', 'Ticket', 'Cabin', 'Embarked']]\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pclass | \n",
" Sex | \n",
" Age | \n",
" SibSp | \n",
" Parch | \n",
" Fare | \n",
" Survived | \n",
" Cabin | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 | \n",
" male | \n",
" 22.0 | \n",
" 1 | \n",
" 0 | \n",
" 7.2500 | \n",
" 0 | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" female | \n",
" 38.0 | \n",
" 1 | \n",
" 0 | \n",
" 71.2833 | \n",
" 1 | \n",
" C85 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" female | \n",
" 26.0 | \n",
" 0 | \n",
" 0 | \n",
" 7.9250 | \n",
" 1 | \n",
" NaN | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" female | \n",
" 35.0 | \n",
" 1 | \n",
" 0 | \n",
" 53.1000 | \n",
" 1 | \n",
" C123 | \n",
"
\n",
" \n",
" 4 | \n",
" 3 | \n",
" male | \n",
" 35.0 | \n",
" 0 | \n",
" 0 | \n",
" 8.0500 | \n",
" 0 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pclass Sex Age SibSp Parch Fare Survived Cabin\n",
"0 3 male 22.0 1 0 7.2500 0 NaN\n",
"1 1 female 38.0 1 0 71.2833 1 C85\n",
"2 3 female 26.0 0 0 7.9250 1 NaN\n",
"3 1 female 35.0 1 0 53.1000 1 C123\n",
"4 3 male 35.0 0 0 8.0500 0 NaN"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Survived', 'Cabin']]\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 891 entries, 0 to 890\n",
"Data columns (total 8 columns):\n",
"Pclass 891 non-null int64\n",
"Sex 891 non-null object\n",
"Age 714 non-null float64\n",
"SibSp 891 non-null int64\n",
"Parch 891 non-null int64\n",
"Fare 891 non-null float64\n",
"Survived 891 non-null int64\n",
"Cabin 204 non-null object\n",
"dtypes: float64(2), int64(4), object(2)\n",
"memory usage: 55.8+ KB\n"
]
}
],
"source": [
"data.info()\n",
"# Cabin column has only 204 entries and the rest are Nan. We can exclude the column as it has too many missing (nan) values\n"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pclass | \n",
" Sex | \n",
" Age | \n",
" SibSp | \n",
" Parch | \n",
" Survived | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 | \n",
" male | \n",
" 22.0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" female | \n",
" 38.0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" female | \n",
" 26.0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" female | \n",
" 35.0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 3 | \n",
" male | \n",
" 35.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 5 | \n",
" 3 | \n",
" male | \n",
" NaN | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 6 | \n",
" 1 | \n",
" male | \n",
" 54.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 7 | \n",
" 3 | \n",
" male | \n",
" 2.0 | \n",
" 3 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 8 | \n",
" 3 | \n",
" female | \n",
" 27.0 | \n",
" 0 | \n",
" 2 | \n",
" 1 | \n",
"
\n",
" \n",
" 9 | \n",
" 2 | \n",
" female | \n",
" 14.0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 10 | \n",
" 3 | \n",
" female | \n",
" 4.0 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" 11 | \n",
" 1 | \n",
" female | \n",
" 58.0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 12 | \n",
" 3 | \n",
" male | \n",
" 20.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 13 | \n",
" 3 | \n",
" male | \n",
" 39.0 | \n",
" 1 | \n",
" 5 | \n",
" 0 | \n",
"
\n",
" \n",
" 14 | \n",
" 3 | \n",
" female | \n",
" 14.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pclass Sex Age SibSp Parch Survived\n",
"0 3 male 22.0 1 0 0\n",
"1 1 female 38.0 1 0 1\n",
"2 3 female 26.0 0 0 1\n",
"3 1 female 35.0 1 0 1\n",
"4 3 male 35.0 0 0 0\n",
"5 3 male NaN 0 0 0\n",
"6 1 male 54.0 0 0 0\n",
"7 3 male 2.0 3 1 0\n",
"8 3 female 27.0 0 2 1\n",
"9 2 female 14.0 1 0 1\n",
"10 3 female 4.0 1 1 1\n",
"11 1 female 58.0 0 0 1\n",
"12 3 male 20.0 0 0 0\n",
"13 3 male 39.0 1 5 0\n",
"14 3 female 14.0 0 0 0"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Survived']]\n",
"data.head(15)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pclass | \n",
" Sex | \n",
" Age | \n",
" SibSp | \n",
" Parch | \n",
" Survived | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 | \n",
" male | \n",
" 22.000000 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" female | \n",
" 38.000000 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" female | \n",
" 26.000000 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" female | \n",
" 35.000000 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 3 | \n",
" male | \n",
" 35.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 5 | \n",
" 3 | \n",
" male | \n",
" 29.699118 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 6 | \n",
" 1 | \n",
" male | \n",
" 54.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 7 | \n",
" 3 | \n",
" male | \n",
" 2.000000 | \n",
" 3 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 8 | \n",
" 3 | \n",
" female | \n",
" 27.000000 | \n",
" 0 | \n",
" 2 | \n",
" 1 | \n",
"
\n",
" \n",
" 9 | \n",
" 2 | \n",
" female | \n",
" 14.000000 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 10 | \n",
" 3 | \n",
" female | \n",
" 4.000000 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" 11 | \n",
" 1 | \n",
" female | \n",
" 58.000000 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 12 | \n",
" 3 | \n",
" male | \n",
" 20.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 13 | \n",
" 3 | \n",
" male | \n",
" 39.000000 | \n",
" 1 | \n",
" 5 | \n",
" 0 | \n",
"
\n",
" \n",
" 14 | \n",
" 3 | \n",
" female | \n",
" 14.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pclass Sex Age SibSp Parch Survived\n",
"0 3 male 22.000000 1 0 0\n",
"1 1 female 38.000000 1 0 1\n",
"2 3 female 26.000000 0 0 1\n",
"3 1 female 35.000000 1 0 1\n",
"4 3 male 35.000000 0 0 0\n",
"5 3 male 29.699118 0 0 0\n",
"6 1 male 54.000000 0 0 0\n",
"7 3 male 2.000000 3 1 0\n",
"8 3 female 27.000000 0 2 1\n",
"9 2 female 14.000000 1 0 1\n",
"10 3 female 4.000000 1 1 1\n",
"11 1 female 58.000000 0 0 1\n",
"12 3 male 20.000000 0 0 0\n",
"13 3 male 39.000000 1 5 0\n",
"14 3 female 14.000000 0 0 0"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Replaces all NAN values in Age column with the mean values of the col.\n",
"data['Age'].fillna(data['Age'].mean(), inplace=True)\n",
"#dataframe.Column_Name.fillna(dataframe.Column_Name.mean(),inplace=True)\n",
"data.head(15)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pclass | \n",
" Sex | \n",
" Age | \n",
" SibSp | \n",
" Parch | \n",
" Survived | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 | \n",
" 0 | \n",
" 22.000000 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 38.000000 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 1 | \n",
" 26.000000 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" 1 | \n",
" 35.000000 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 3 | \n",
" 0 | \n",
" 35.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 5 | \n",
" 3 | \n",
" 0 | \n",
" 29.699118 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 6 | \n",
" 1 | \n",
" 0 | \n",
" 54.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 7 | \n",
" 3 | \n",
" 0 | \n",
" 2.000000 | \n",
" 3 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 8 | \n",
" 3 | \n",
" 1 | \n",
" 27.000000 | \n",
" 0 | \n",
" 2 | \n",
" 1 | \n",
"
\n",
" \n",
" 9 | \n",
" 2 | \n",
" 1 | \n",
" 14.000000 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 10 | \n",
" 3 | \n",
" 1 | \n",
" 4.000000 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" 11 | \n",
" 1 | \n",
" 1 | \n",
" 58.000000 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 12 | \n",
" 3 | \n",
" 0 | \n",
" 20.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 13 | \n",
" 3 | \n",
" 0 | \n",
" 39.000000 | \n",
" 1 | \n",
" 5 | \n",
" 0 | \n",
"
\n",
" \n",
" 14 | \n",
" 3 | \n",
" 1 | \n",
" 14.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pclass Sex Age SibSp Parch Survived\n",
"0 3 0 22.000000 1 0 0\n",
"1 1 1 38.000000 1 0 1\n",
"2 3 1 26.000000 0 0 1\n",
"3 1 1 35.000000 1 0 1\n",
"4 3 0 35.000000 0 0 0\n",
"5 3 0 29.699118 0 0 0\n",
"6 1 0 54.000000 0 0 0\n",
"7 3 0 2.000000 3 1 0\n",
"8 3 1 27.000000 0 2 1\n",
"9 2 1 14.000000 1 0 1\n",
"10 3 1 4.000000 1 1 1\n",
"11 1 1 58.000000 0 0 1\n",
"12 3 0 20.000000 0 0 0\n",
"13 3 0 39.000000 1 5 0\n",
"14 3 1 14.000000 0 0 0"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# lets convert the Sex column by mapping male to 0 and female to 1 in the dataset\n",
"data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})\n",
"data.head(15)\n"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"# write out the clean dataset to a new csv file\n",
"data.to_csv('cleansed.csv', sep=',')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}