diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb
new file mode 100644
index 0000000..2618d38
--- /dev/null
+++ b/lab-dw-data-structuring-and-combining.ipynb
@@ -0,0 +1,1449 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "25d7736c-ba17-4aff-b6bb-66eba20fbf4e",
+   "metadata": {
+    "id": "25d7736c-ba17-4aff-b6bb-66eba20fbf4e"
+   },
+   "source": [
+    "# Lab | Data Structuring and Combining Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a2cdfc70-44c8-478c-81e7-2bc43fdf4986",
+   "metadata": {
+    "id": "a2cdfc70-44c8-478c-81e7-2bc43fdf4986"
+   },
+   "source": [
+    "## Challenge 1: Combining & Cleaning Data\n",
+    "\n",
+    "In this challenge, we will be working with the customer data from an insurance company, as we did in the two previous labs. The data can be found here:\n",
+    "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\n",
+    "\n",
+    "But this time, we got new data, which can be found in the following 2 CSV files located at the links below.\n",
+    "\n",
+    "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\n",
+    "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\n",
+    "\n",
+    "Note that you'll need to clean and format the new data.\n",
+    "\n",
+    "Observation:\n",
+    "- One option is to first combine the three datasets and then apply the cleaning function to the new combined dataset\n",
+    "- Another option would be to read the clean file you saved in the previous lab, and just clean the two new files and concatenate the three clean datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "id": "492d06e3-92c7-4105-ac72-536db98d3244",
+   "metadata": {
+    "id": "492d06e3-92c7-4105-ac72-536db98d3244"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "id": "b359f50c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "customer_data_insurance_df = pd.read_csv(\"Customer_data.csv\")\n",
+    "customer_data_insurance2_df = pd.read_csv(\"Customer_data2.csv\")\n",
+    "customer_data_insurance3_df = pd.read_csv(\"Customer_data3.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "id": "62014030",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.DataFrame'>\n",
+      "RangeIndex: 4008 entries, 0 to 4007\n",
+      "Data columns (total 11 columns):\n",
+      " #   Column                     Non-Null Count  Dtype  \n",
+      "---  ------                     --------------  -----  \n",
+      " 0   Customer                   1071 non-null   str    \n",
+      " 1   ST                         1071 non-null   str    \n",
+      " 2   GENDER                     954 non-null    str    \n",
+      " 3   Education                  1071 non-null   str    \n",
+      " 4   Customer Lifetime Value    1068 non-null   str    \n",
+      " 5   Income                     1071 non-null   float64\n",
+      " 6   Monthly Premium Auto       1071 non-null   float64\n",
+      " 7   Number of Open Complaints  1071 non-null   str    \n",
+      " 8   Policy Type                1071 non-null   str    \n",
+      " 9   Vehicle Class              1071 non-null   str    \n",
+      " 10  Total Claim Amount         1071 non-null   float64\n",
+      "dtypes: float64(3), str(8)\n",
+      "memory usage: 417.4 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "customer_data_insurance_df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "id": "2e0d2f9e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Customer</th>\n",
+       "      <th>ST</th>\n",
+       "      <th>GENDER</th>\n",
+       "      <th>Education</th>\n",
+       "      <th>Customer Lifetime Value</th>\n",
+       "      <th>Income</th>\n",
+       "      <th>Monthly Premium Auto</th>\n",
+       "      <th>Number of Open Complaints</th>\n",
+       "      <th>Policy Type</th>\n",
+       "      <th>Vehicle Class</th>\n",
+       "      <th>Total Claim Amount</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>RB50392</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Master</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1000.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>2.704934</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>QZ44356</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>697953.59%</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>94.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>1131.464935</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AI49188</td>\n",
+       "      <td>Nevada</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>1288743.17%</td>\n",
+       "      <td>48767.0</td>\n",
+       "      <td>108.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Two-Door Car</td>\n",
+       "      <td>566.472247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>WW63253</td>\n",
+       "      <td>California</td>\n",
+       "      <td>M</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>764586.18%</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>529.881344</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>GA49547</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>M</td>\n",
+       "      <td>High School or Below</td>\n",
+       "      <td>536307.65%</td>\n",
+       "      <td>36357.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>17.269323</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  Customer          ST GENDER             Education Customer Lifetime Value  \\\n",
+       "0  RB50392  Washington    NaN                Master                     NaN   \n",
+       "1  QZ44356     Arizona      F              Bachelor              697953.59%   \n",
+       "2  AI49188      Nevada      F              Bachelor             1288743.17%   \n",
+       "3  WW63253  California      M              Bachelor              764586.18%   \n",
+       "4  GA49547  Washington      M  High School or Below              536307.65%   \n",
+       "\n",
+       "    Income  Monthly Premium Auto Number of Open Complaints     Policy Type  \\\n",
+       "0      0.0                1000.0                    1/0/00   Personal Auto   \n",
+       "1      0.0                  94.0                    1/0/00   Personal Auto   \n",
+       "2  48767.0                 108.0                    1/0/00   Personal Auto   \n",
+       "3      0.0                 106.0                    1/0/00  Corporate Auto   \n",
+       "4  36357.0                  68.0                    1/0/00   Personal Auto   \n",
+       "\n",
+       "   Vehicle Class  Total Claim Amount  \n",
+       "0  Four-Door Car            2.704934  \n",
+       "1  Four-Door Car         1131.464935  \n",
+       "2   Two-Door Car          566.472247  \n",
+       "3            SUV          529.881344  \n",
+       "4  Four-Door Car           17.269323  "
+      ]
+     },
+     "execution_count": 81,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "customer_data_insurance_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "id": "54b042b2",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Customer</th>\n",
+       "      <th>ST</th>\n",
+       "      <th>GENDER</th>\n",
+       "      <th>Education</th>\n",
+       "      <th>Customer Lifetime Value</th>\n",
+       "      <th>Income</th>\n",
+       "      <th>Monthly Premium Auto</th>\n",
+       "      <th>Number of Open Complaints</th>\n",
+       "      <th>Total Claim Amount</th>\n",
+       "      <th>Policy Type</th>\n",
+       "      <th>Vehicle Class</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>GS98873</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>323912.47%</td>\n",
+       "      <td>16061</td>\n",
+       "      <td>88</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>633.6</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>CW49887</td>\n",
+       "      <td>California</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Master</td>\n",
+       "      <td>462680.11%</td>\n",
+       "      <td>79487</td>\n",
+       "      <td>114</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>547.2</td>\n",
+       "      <td>Special Auto</td>\n",
+       "      <td>SUV</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>MY31220</td>\n",
+       "      <td>California</td>\n",
+       "      <td>F</td>\n",
+       "      <td>College</td>\n",
+       "      <td>899704.02%</td>\n",
+       "      <td>54230</td>\n",
+       "      <td>112</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>537.6</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Two-Door Car</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>UH35128</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>F</td>\n",
+       "      <td>College</td>\n",
+       "      <td>2580706.30%</td>\n",
+       "      <td>71210</td>\n",
+       "      <td>214</td>\n",
+       "      <td>1/1/00</td>\n",
+       "      <td>1027.2</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Luxury Car</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>WH52799</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>F</td>\n",
+       "      <td>College</td>\n",
+       "      <td>380812.21%</td>\n",
+       "      <td>94903</td>\n",
+       "      <td>94</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>451.2</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>Two-Door Car</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  Customer          ST GENDER Education Customer Lifetime Value  Income  \\\n",
+       "0  GS98873     Arizona      F  Bachelor              323912.47%   16061   \n",
+       "1  CW49887  California      F    Master              462680.11%   79487   \n",
+       "2  MY31220  California      F   College              899704.02%   54230   \n",
+       "3  UH35128      Oregon      F   College             2580706.30%   71210   \n",
+       "4  WH52799     Arizona      F   College              380812.21%   94903   \n",
+       "\n",
+       "   Monthly Premium Auto Number of Open Complaints  Total Claim Amount  \\\n",
+       "0                    88                    1/0/00               633.6   \n",
+       "1                   114                    1/0/00               547.2   \n",
+       "2                   112                    1/0/00               537.6   \n",
+       "3                   214                    1/1/00              1027.2   \n",
+       "4                    94                    1/0/00               451.2   \n",
+       "\n",
+       "      Policy Type  Vehicle Class  \n",
+       "0   Personal Auto  Four-Door Car  \n",
+       "1    Special Auto            SUV  \n",
+       "2   Personal Auto   Two-Door Car  \n",
+       "3   Personal Auto     Luxury Car  \n",
+       "4  Corporate Auto   Two-Door Car  "
+      ]
+     },
+     "execution_count": 82,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "customer_data_insurance2_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "id": "3d42ddc9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Customer</th>\n",
+       "      <th>State</th>\n",
+       "      <th>Customer Lifetime Value</th>\n",
+       "      <th>Education</th>\n",
+       "      <th>Gender</th>\n",
+       "      <th>Income</th>\n",
+       "      <th>Monthly Premium Auto</th>\n",
+       "      <th>Number of Open Complaints</th>\n",
+       "      <th>Policy Type</th>\n",
+       "      <th>Total Claim Amount</th>\n",
+       "      <th>Vehicle Class</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>SA25987</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>3479.137523</td>\n",
+       "      <td>High School or Below</td>\n",
+       "      <td>M</td>\n",
+       "      <td>0</td>\n",
+       "      <td>104</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>499.200000</td>\n",
+       "      <td>Two-Door Car</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>TB86706</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>2502.637401</td>\n",
+       "      <td>Master</td>\n",
+       "      <td>M</td>\n",
+       "      <td>0</td>\n",
+       "      <td>66</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>3.468912</td>\n",
+       "      <td>Two-Door Car</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>ZL73902</td>\n",
+       "      <td>Nevada</td>\n",
+       "      <td>3265.156348</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>F</td>\n",
+       "      <td>25820</td>\n",
+       "      <td>82</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>393.600000</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>KX23516</td>\n",
+       "      <td>California</td>\n",
+       "      <td>4455.843406</td>\n",
+       "      <td>High School or Below</td>\n",
+       "      <td>F</td>\n",
+       "      <td>0</td>\n",
+       "      <td>121</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>699.615192</td>\n",
+       "      <td>SUV</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>FN77294</td>\n",
+       "      <td>California</td>\n",
+       "      <td>7704.958480</td>\n",
+       "      <td>High School or Below</td>\n",
+       "      <td>M</td>\n",
+       "      <td>30366</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>484.800000</td>\n",
+       "      <td>SUV</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  Customer       State  Customer Lifetime Value             Education Gender  \\\n",
+       "0  SA25987  Washington              3479.137523  High School or Below      M   \n",
+       "1  TB86706     Arizona              2502.637401                Master      M   \n",
+       "2  ZL73902      Nevada              3265.156348              Bachelor      F   \n",
+       "3  KX23516  California              4455.843406  High School or Below      F   \n",
+       "4  FN77294  California              7704.958480  High School or Below      M   \n",
+       "\n",
+       "   Income  Monthly Premium Auto  Number of Open Complaints    Policy Type  \\\n",
+       "0       0                   104                          0  Personal Auto   \n",
+       "1       0                    66                          0  Personal Auto   \n",
+       "2   25820                    82                          0  Personal Auto   \n",
+       "3       0                   121                          0  Personal Auto   \n",
+       "4   30366                   101                          2  Personal Auto   \n",
+       "\n",
+       "   Total Claim Amount  Vehicle Class  \n",
+       "0          499.200000   Two-Door Car  \n",
+       "1            3.468912   Two-Door Car  \n",
+       "2          393.600000  Four-Door Car  \n",
+       "3          699.615192            SUV  \n",
+       "4          484.800000            SUV  "
+      ]
+     },
+     "execution_count": 83,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "customer_data_insurance3_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "id": "8665654f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_customer_data_df = pd.concat([customer_data_insurance_df, customer_data_insurance2_df, customer_data_insurance3_df], axis=0, ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "id": "8750b6da",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.DataFrame'>\n",
+      "RangeIndex: 12074 entries, 0 to 12073\n",
+      "Data columns (total 13 columns):\n",
+      " #   Column                     Non-Null Count  Dtype  \n",
+      "---  ------                     --------------  -----  \n",
+      " 0   Customer                   9137 non-null   str    \n",
+      " 1   ST                         2067 non-null   str    \n",
+      " 2   GENDER                     1945 non-null   str    \n",
+      " 3   Education                  9137 non-null   str    \n",
+      " 4   Customer Lifetime Value    9130 non-null   object \n",
+      " 5   Income                     9137 non-null   float64\n",
+      " 6   Monthly Premium Auto       9137 non-null   float64\n",
+      " 7   Number of Open Complaints  9137 non-null   object \n",
+      " 8   Policy Type                9137 non-null   str    \n",
+      " 9   Vehicle Class              9137 non-null   str    \n",
+      " 10  Total Claim Amount         9137 non-null   float64\n",
+      " 11  State                      7070 non-null   str    \n",
+      " 12  Gender                     7070 non-null   str    \n",
+      "dtypes: float64(3), object(2), str(8)\n",
+      "memory usage: 1.6+ MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "all_customer_data_df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "id": "0bcbd3f3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_customer_data_df[\"state\"] = all_customer_data_df[\"ST\"].fillna(all_customer_data_df[\"State\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "id": "dda0cee2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_customer_data_df = all_customer_data_df.drop([\"ST\", \"State\"], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "id": "ba77d002",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.DataFrame'>\n",
+      "RangeIndex: 12074 entries, 0 to 12073\n",
+      "Data columns (total 12 columns):\n",
+      " #   Column                     Non-Null Count  Dtype  \n",
+      "---  ------                     --------------  -----  \n",
+      " 0   Customer                   9137 non-null   str    \n",
+      " 1   GENDER                     1945 non-null   str    \n",
+      " 2   Education                  9137 non-null   str    \n",
+      " 3   Customer Lifetime Value    9130 non-null   object \n",
+      " 4   Income                     9137 non-null   float64\n",
+      " 5   Monthly Premium Auto       9137 non-null   float64\n",
+      " 6   Number of Open Complaints  9137 non-null   object \n",
+      " 7   Policy Type                9137 non-null   str    \n",
+      " 8   Vehicle Class              9137 non-null   str    \n",
+      " 9   Total Claim Amount         9137 non-null   float64\n",
+      " 10  Gender                     7070 non-null   str    \n",
+      " 11  state                      9137 non-null   str    \n",
+      "dtypes: float64(3), object(2), str(7)\n",
+      "memory usage: 1.5+ MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "all_customer_data_df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "id": "ecc399b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_customer_data_df[\"gender\"] = all_customer_data_df[\"GENDER\"].fillna(all_customer_data_df[\"Gender\"])\n",
+    "all_customer_data_df = all_customer_data_df.drop([\"GENDER\", \"Gender\"], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "id": "76a0d342",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.DataFrame'>\n",
+      "RangeIndex: 12074 entries, 0 to 12073\n",
+      "Data columns (total 11 columns):\n",
+      " #   Column                     Non-Null Count  Dtype  \n",
+      "---  ------                     --------------  -----  \n",
+      " 0   Customer                   9137 non-null   str    \n",
+      " 1   Education                  9137 non-null   str    \n",
+      " 2   Customer Lifetime Value    9130 non-null   object \n",
+      " 3   Income                     9137 non-null   float64\n",
+      " 4   Monthly Premium Auto       9137 non-null   float64\n",
+      " 5   Number of Open Complaints  9137 non-null   object \n",
+      " 6   Policy Type                9137 non-null   str    \n",
+      " 7   Vehicle Class              9137 non-null   str    \n",
+      " 8   Total Claim Amount         9137 non-null   float64\n",
+      " 9   state                      9137 non-null   str    \n",
+      " 10  gender                     9015 non-null   str    \n",
+      "dtypes: float64(3), object(2), str(6)\n",
+      "memory usage: 1.5+ MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "all_customer_data_df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "id": "5dbdc199",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cle_for(df, state_conversion, gender_conversion):\n",
+    "    # Cleaning columns\n",
+    "    df.columns = df.columns.str.lower().str.replace(' ', '_')\n",
+    "\n",
+    "    # Drop rows where all elements are NaN\n",
+    "    df = df.dropna(axis=0, how=\"all\")\n",
+    "    \n",
+    "    # Map and replace categories\n",
+    "    df[\"state\"] = df[\"state\"].map(state_conversion).fillna(df[\"state\"])\n",
+    "    df[\"gender\"] = df[\"gender\"].map(gender_conversion).fillna(df[\"gender\"])\n",
+    "    return df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 92,
+   "id": "0b5b7843",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "state_conversion = {\n",
+    "    'Cali': 'California', 'AZ': 'Arizona', 'WA': 'Washington',\n",
+    "    'Washington': 'Washington', 'Arizona': 'Arizona', 'Nevada': 'Nevada', 'California': 'California', 'Oregon': 'Oregon'\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "id": "b8ddc601",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gender_conversion = {\n",
+    "    \"Male\": \"M\", \"Female\": \"F\", \"Femal\": \"F\", \"female\": \"F\"\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "id": "597c88ef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cleaned_customer_data_insurance_df = cle_for(all_customer_data_df, state_conversion, gender_conversion)\n",
+    "cleaned_customer_data_insurance_df = cleaned_customer_data_insurance_df.reset_index(drop=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "id": "fb9ce740",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['customer', 'education', 'customer_lifetime_value', 'income',\n",
+       "       'monthly_premium_auto', 'number_of_open_complaints', 'policy_type',\n",
+       "       'vehicle_class', 'total_claim_amount', 'state', 'gender'],\n",
+       "      dtype='str')"
+      ]
+     },
+     "execution_count": 95,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "id": "2e30f9c2",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<ArrowStringArray>\n",
+       "['Master', 'Bachelor', 'High School or Below', 'College', 'Bachelors',\n",
+       " 'Doctor']\n",
+       "Length: 6, dtype: str"
+      ]
+     },
+     "execution_count": 96,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"education\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "id": "b69e1b84",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"education\"] = cleaned_customer_data_insurance_df[\"education\"].replace({\n",
+    "    \"Bachelors\": \"Bachelor\"\n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "id": "c08248d0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<ArrowStringArray>\n",
+       "['Master', 'Bachelor', 'High School or Below', 'College', 'Doctor']\n",
+       "Length: 5, dtype: str"
+      ]
+     },
+     "execution_count": 98,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"education\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "id": "12edcfbf",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([nan, '697953.59%', '1288743.17%', ..., 8163.890428, 7524.442436,\n",
+       "       2611.836866], shape=(8212,), dtype=object)"
+      ]
+     },
+     "execution_count": 99,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"customer_lifetime_value\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "id": "9a672f79",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"customer_lifetime_value\"] = cleaned_customer_data_insurance_df[\"customer_lifetime_value\"].str.replace('%', '', regex=False)\n",
+    "cleaned_customer_data_insurance_df[\"customer_lifetime_value\"] = pd.to_numeric(\n",
+    "    cleaned_customer_data_insurance_df[\"customer_lifetime_value\"], errors='coerce'\n",
+    "    )\n",
+    "median_value = cleaned_customer_data_insurance_df[\"customer_lifetime_value\"].median()\n",
+    "cleaned_customer_data_insurance_df[\"customer_lifetime_value\"] = cleaned_customer_data_insurance_df[\"customer_lifetime_value\"].fillna(median_value)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 101,
+   "id": "84d0cc05",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 572027.54,  697953.59, 1288743.17, ...,  568964.41,  368672.38,\n",
+       "        399258.39], shape=(1924,))"
+      ]
+     },
+     "execution_count": 101,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"customer_lifetime_value\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 102,
+   "id": "422095b1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['1/0/00', '1/2/00', '1/1/00', '1/3/00', '1/5/00', '1/4/00', 0, 2,\n",
+       "       3, 1, 5, 4], dtype=object)"
+      ]
+     },
+     "execution_count": 102,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"number_of_open_complaints\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 103,
+   "id": "46e84fed",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dtype('O')"
+      ]
+     },
+     "execution_count": 103,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"number_of_open_complaints\"].dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "id": "bbb68ce2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"number_of_open_complaints\"] = cleaned_customer_data_insurance_df[\"number_of_open_complaints\"].astype(str)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "id": "f78d0b0f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<StringDtype(na_value=nan)>"
+      ]
+     },
+     "execution_count": 105,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"number_of_open_complaints\"].dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "id": "b97e6ab9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def convert_values(x):\n",
+    "    if pd.isnull(x):\n",
+    "        return pd.NA  # Handle missing values\n",
+    "    if isinstance(x, str) and \"/\" in x:\n",
+    "        return int(x.split(\"/\")[1])  # Extract the second part of the split string\n",
+    "    try:\n",
+    "        return int(x)  # Directly convert numeric strings or numbers\n",
+    "    except ValueError:\n",
+    "        return pd.NA  # In case of any unexpected values\n",
+    "\n",
+    "cleaned_customer_data_insurance_df[\"number_of_open_complaints\"] = (\n",
+    "    cleaned_customer_data_insurance_df[\"number_of_open_complaints\"]\n",
+    "    .apply(convert_values)\n",
+    "    .astype(pd.Int64Dtype())\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 107,
+   "id": "45d02bff",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<IntegerArray>\n",
+       "[0, 2, 1, 3, 5, 4]\n",
+       "Length: 6, dtype: Int64"
+      ]
+     },
+     "execution_count": 107,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"number_of_open_complaints\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 108,
+   "id": "d0f98c0f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['customer', 'education', 'customer_lifetime_value', 'income',\n",
+       "       'monthly_premium_auto', 'number_of_open_complaints', 'policy_type',\n",
+       "       'vehicle_class', 'total_claim_amount', 'state', 'gender'],\n",
+       "      dtype='str')"
+      ]
+     },
+     "execution_count": 108,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 109,
+   "id": "111101c0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<ArrowStringArray>\n",
+       "['Personal Auto', 'Corporate Auto', 'Special Auto']\n",
+       "Length: 3, dtype: str"
+      ]
+     },
+     "execution_count": 109,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"policy_type\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 110,
+   "id": "295d4ee8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<ArrowStringArray>\n",
+       "['Four-Door Car',  'Two-Door Car',           'SUV',    'Luxury SUV',\n",
+       "    'Sports Car',    'Luxury Car']\n",
+       "Length: 6, dtype: str"
+      ]
+     },
+     "execution_count": 110,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"vehicle_class\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 111,
+   "id": "f930b71e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<ArrowStringArray>\n",
+       "['Four-Door Car', 'Two-Door Car', 'SUV', 'Luxury']\n",
+       "Length: 4, dtype: str"
+      ]
+     },
+     "execution_count": 111,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"vehicle_class\"] = cleaned_customer_data_insurance_df[\"vehicle_class\"].replace(\n",
+    "    {\"Sports Car\": \"Luxury\", \"Luxury SUV\": \"Luxury\", \"Luxury Car\": \"Luxury\"}\n",
+    ")\n",
+    "cleaned_customer_data_insurance_df[\"vehicle_class\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 112,
+   "id": "29493aa9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<ArrowStringArray>\n",
+       "[nan, 'F', 'M']\n",
+       "Length: 3, dtype: str"
+      ]
+     },
+     "execution_count": 112,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"gender\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 120,
+   "id": "5bbcd977",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<ArrowStringArray>\n",
+       "['Washington', 'Arizona', 'Nevada', 'California', 'Oregon']\n",
+       "Length: 5, dtype: str"
+      ]
+     },
+     "execution_count": 120,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df[\"state\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 115,
+   "id": "98a88b39",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.DataFrame'>\n",
+      "RangeIndex: 9137 entries, 0 to 9136\n",
+      "Data columns (total 11 columns):\n",
+      " #   Column                     Non-Null Count  Dtype  \n",
+      "---  ------                     --------------  -----  \n",
+      " 0   customer                   9137 non-null   str    \n",
+      " 1   education                  9137 non-null   str    \n",
+      " 2   customer_lifetime_value    9137 non-null   float64\n",
+      " 3   income                     9137 non-null   float64\n",
+      " 4   monthly_premium_auto       9137 non-null   float64\n",
+      " 5   number_of_open_complaints  9137 non-null   Int64  \n",
+      " 6   policy_type                9137 non-null   str    \n",
+      " 7   vehicle_class              9137 non-null   str    \n",
+      " 8   total_claim_amount         9137 non-null   float64\n",
+      " 9   state                      9137 non-null   str    \n",
+      " 10  gender                     9015 non-null   str    \n",
+      "dtypes: Int64(1), float64(4), str(6)\n",
+      "memory usage: 1.2 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "cleaned_customer_data_insurance_df.info()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "31b8a9e7-7db9-4604-991b-ef6771603e57",
+   "metadata": {
+    "id": "31b8a9e7-7db9-4604-991b-ef6771603e57"
+   },
+   "source": [
+    "# Challenge 2: Structuring Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a877fd6d-7a0c-46d2-9657-f25036e4ca4b",
+   "metadata": {
+    "id": "a877fd6d-7a0c-46d2-9657-f25036e4ca4b"
+   },
+   "source": [
+    "In this challenge, we will continue to work with customer data from an insurance company, but we will use a dataset with more columns, called marketing_customer_analysis.csv, which can be found at the following link:\n",
+    "\n",
+    "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\n",
+    "\n",
+    "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by performing data cleaning, formatting, and structuring."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 117,
+   "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26",
+   "metadata": {
+    "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26"
+   },
+   "outputs": [],
+   "source": [
+    "marketing_customer_analysis_df = pd.read_csv(\"marketing_customer_analysis_clean.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 119,
+   "id": "5c39f213",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "unnamed:_0                       10910\n",
+       "customer                          9134\n",
+       "state                                5\n",
+       "customer_lifetime_value           8041\n",
+       "response                             2\n",
+       "coverage                             3\n",
+       "education                            5\n",
+       "effective_to_date                   59\n",
+       "employmentstatus                     5\n",
+       "gender                               2\n",
+       "income                            5694\n",
+       "location_code                        3\n",
+       "marital_status                       3\n",
+       "monthly_premium_auto               202\n",
+       "months_since_last_claim             37\n",
+       "months_since_policy_inception      100\n",
+       "number_of_open_complaints            7\n",
+       "number_of_policies                   9\n",
+       "policy_type                          3\n",
+       "policy                               9\n",
+       "renew_offer_type                     4\n",
+       "sales_channel                        4\n",
+       "total_claim_amount                5106\n",
+       "vehicle_class                        6\n",
+       "vehicle_size                         3\n",
+       "vehicle_type                         1\n",
+       "month                                2\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 119,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "marketing_customer_analysis_df.nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "df35fd0d-513e-4e77-867e-429da10a9cc7",
+   "metadata": {
+    "id": "df35fd0d-513e-4e77-867e-429da10a9cc7"
+   },
+   "source": [
+    "1. You work at the marketing department and you want to know which sales channel brought the most sales in terms of total revenue. Using pivot, create a summary table showing the total revenue for each sales channel (branch, call center, web, and mail).\n",
+    "Round the total revenue to 2 decimal points.  Analyze the resulting table to draw insights."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 124,
+   "id": "80a6f4b4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "               monthly_premium_auto\n",
+      "sales_channel                      \n",
+      "Agent                        386335\n",
+      "Branch                       280953\n",
+      "Call Center                  197970\n",
+      "Web                          151511\n"
+     ]
+    }
+   ],
+   "source": [
+    "premium_pivot_table = marketing_customer_analysis_df.pivot_table(\n",
+    "    values='monthly_premium_auto',\n",
+    "    index='sales_channel',\n",
+    "    aggfunc='sum'\n",
+    ")\n",
+    "\n",
+    "premium_pivot_table = premium_pivot_table.round(2)\n",
+    "\n",
+    "print(premium_pivot_table)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 125,
+   "id": "e6c88014",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#The data might hint at market preferences that skew toward personal interaction.\n",
+    "# This could be critical information when planning marketing campaigns or customer interaction strategies."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "640993b2-a291-436c-a34d-a551144f8196",
+   "metadata": {
+    "id": "640993b2-a291-436c-a34d-a551144f8196"
+   },
+   "source": [
+    "2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 123,
+   "id": "8277be56",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "education  Bachelor  College   Doctor  High School or Below   Master\n",
+      "gender                                                              \n",
+      "F           7874.27  7748.82  7328.51               8675.22  8157.05\n",
+      "M           7703.60  8052.46  7415.33               8149.69  8168.83\n"
+     ]
+    }
+   ],
+   "source": [
+    "pivot_table_clv = marketing_customer_analysis_df.pivot_table(\n",
+    "    values='customer_lifetime_value',\n",
+    "    index='gender',\n",
+    "    columns='education',\n",
+    "    aggfunc='mean'\n",
+    ")\n",
+    "\n",
+    "pivot_table_clv = pivot_table_clv.round(2)\n",
+    "\n",
+    "print(pivot_table_clv)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 126,
+   "id": "7ffb4609",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Given the higher CLVs among females with \"High School or Below\" and males with \"Masters,\" these groups could be prime targets for retention strategies and upselling higher insurance packages or additional service offerings."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "32c7f2e5-3d90-43e5-be33-9781b6069198",
+   "metadata": {
+    "id": "32c7f2e5-3d90-43e5-be33-9781b6069198"
+   },
+   "source": [
+    "## Bonus\n",
+    "\n",
+    "You work at the customer service department and you want to know which months had the highest number of complaints by policy type category. Create a summary table showing the number of complaints by policy type and month.\n",
+    "Show it in a long format table."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e3d09a8f-953c-448a-a5f8-2e5a8cca7291",
+   "metadata": {
+    "id": "e3d09a8f-953c-448a-a5f8-2e5a8cca7291"
+   },
+   "source": [
+    "*In data analysis, a long format table is a way of structuring data in which each observation or measurement is stored in a separate row of the table. The key characteristic of a long format table is that each column represents a single variable, and each row represents a single observation of that variable.*\n",
+    "\n",
+    "*More information about long and wide format tables here: https://www.statology.org/long-vs-wide-data/*"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
+   "id": "3a069e0b-b400-470e-904d-d17582191be4",
+   "metadata": {
+    "id": "3a069e0b-b400-470e-904d-d17582191be4"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "      policy_type  month  number_of_complaints\n",
+      "0  Corporate Auto      1                  1252\n",
+      "1  Corporate Auto      2                  1089\n",
+      "2   Personal Auto      1                  4329\n",
+      "3   Personal Auto      2                  3799\n",
+      "4    Special Auto      1                   237\n",
+      "5    Special Auto      2                   204\n"
+     ]
+    }
+   ],
+   "source": [
+    "df_complaints = marketing_customer_analysis_df.groupby(['policy_type', 'month']).size().reset_index(name='number_of_complaints')\n",
+    "\n",
+    "print(df_complaints)"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

	Customer	ST	GENDER	Education	Customer Lifetime Value	Income	Monthly Premium Auto	Number of Open Complaints	Policy Type	Vehicle Class	Total Claim Amount
0	RB50392	Washington	NaN	Master	NaN	0.0	1000.0	1/0/00	Personal Auto	Four-Door Car	2.704934
1	QZ44356	Arizona	F	Bachelor	697953.59%	0.0	94.0	1/0/00	Personal Auto	Four-Door Car	1131.464935
2	AI49188	Nevada	F	Bachelor	1288743.17%	48767.0	108.0	1/0/00	Personal Auto	Two-Door Car	566.472247
3	WW63253	California	M	Bachelor	764586.18%	0.0	106.0	1/0/00	Corporate Auto	SUV	529.881344
4	GA49547	Washington	M	High School or Below	536307.65%	36357.0	68.0	1/0/00	Personal Auto	Four-Door Car	17.269323
	Customer	ST	GENDER	Education	Customer Lifetime Value	Income	Monthly Premium Auto	Number of Open Complaints	Total Claim Amount	Policy Type	Vehicle Class
0	GS98873	Arizona	F	Bachelor	323912.47%	16061	88	1/0/00	633.6	Personal Auto	Four-Door Car
1	CW49887	California	F	Master	462680.11%	79487	114	1/0/00	547.2	Special Auto	SUV
2	MY31220	California	F	College	899704.02%	54230	112	1/0/00	537.6	Personal Auto	Two-Door Car
3	UH35128	Oregon	F	College	2580706.30%	71210	214	1/1/00	1027.2	Personal Auto	Luxury Car
4	WH52799	Arizona	F	College	380812.21%	94903	94	1/0/00	451.2	Corporate Auto	Two-Door Car
	Customer	State	Customer Lifetime Value	Education	Gender	Income	Monthly Premium Auto	Number of Open Complaints	Policy Type	Total Claim Amount	Vehicle Class
0	SA25987	Washington	3479.137523	High School or Below	M	0	104	0	Personal Auto	499.200000	Two-Door Car
1	TB86706	Arizona	2502.637401	Master	M	0	66	0	Personal Auto	3.468912	Two-Door Car
2	ZL73902	Nevada	3265.156348	Bachelor	F	25820	82	0	Personal Auto	393.600000	Four-Door Car
3	KX23516	California	4455.843406	High School or Below	F	0	121	0	Personal Auto	699.615192	SUV
4	FN77294	California	7704.958480	High School or Below	M	30366	101	2	Personal Auto	484.800000	SUV