diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2eea525 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.env \ No newline at end of file diff --git a/sql-python_connections_julian.ipynb b/sql-python_connections_julian.ipynb new file mode 100644 index 0000000..0b3231f --- /dev/null +++ b/sql-python_connections_julian.ipynb @@ -0,0 +1,1232 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 32, + "id": "bb29502c", + "metadata": {}, + "outputs": [], + "source": [ + "#complete all imports\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import pymysql\n", + "from sqlalchemy import create_engine, text\n", + "import getpass\n", + "from dotenv import load_dotenv\n", + "import os\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "8b8df206", + "metadata": {}, + "outputs": [], + "source": [ + "#load password\n", + "\n", + "load_dotenv()\n", + "password = os.getenv(\"mysql\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "3253919b", + "metadata": {}, + "outputs": [], + "source": [ + "## building a connection \n", + "\n", + "user = \"root\"\n", + "host = \"localhost\"\n", + "database = \"sakila\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "669ea52a", + "metadata": {}, + "outputs": [], + "source": [ + "### Q1: Connection and Chel" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "16eb545c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "engine = create_engine(f\"mysql+pymysql://{user}:{password}@{host}/{database}\")\n", + "engine.connect()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "aed20073", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Tables_in_sakila
0actor
1actor_info
2address
3category
4city
5country
6customer
7customer_list
8customer_rental_summary
9film
10film_actor
11film_category
12film_list
13film_text
14inventory
15language
16nicer_but_slower_film_list
17payment
18rental
19sales_by_film_category
20sales_by_store
21staff
22staff_list
23store
\n", + "
" + ], + "text/plain": [ + " Tables_in_sakila\n", + "0 actor\n", + "1 actor_info\n", + "2 address\n", + "3 category\n", + "4 city\n", + "5 country\n", + "6 customer\n", + "7 customer_list\n", + "8 customer_rental_summary\n", + "9 film\n", + "10 film_actor\n", + "11 film_category\n", + "12 film_list\n", + "13 film_text\n", + "14 inventory\n", + "15 language\n", + "16 nicer_but_slower_film_list\n", + "17 payment\n", + "18 rental\n", + "19 sales_by_film_category\n", + "20 sales_by_store\n", + "21 staff\n", + "22 staff_list\n", + "23 store" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.read_sql(\"show tables from sakila;\", engine)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "3093bc41", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_update
012005-05-24 22:53:303671302005-05-26 22:04:3012006-02-15 21:30:53
122005-05-24 22:54:3315254592005-05-28 19:40:3312006-02-15 21:30:53
232005-05-24 23:03:3917114082005-06-01 22:12:3912006-02-15 21:30:53
342005-05-24 23:04:4124523332005-06-03 01:43:4122006-02-15 21:30:53
452005-05-24 23:05:2120792222005-06-02 04:33:2112006-02-15 21:30:53
........................
16039160452005-08-23 22:25:26772142005-08-25 23:54:2612006-02-15 21:30:53
16040160462005-08-23 22:26:474364742005-08-27 18:02:4722006-02-15 21:30:53
16041160472005-08-23 22:42:4820881142005-08-25 02:48:4822006-02-15 21:30:53
16042160482005-08-23 22:43:0720191032005-08-31 21:33:0712006-02-15 21:30:53
16043160492005-08-23 22:50:1226663932005-08-30 01:01:1222006-02-15 21:30:53
\n", + "

16044 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1 2005-05-24 22:53:30 367 130 \n", + "1 2 2005-05-24 22:54:33 1525 459 \n", + "2 3 2005-05-24 23:03:39 1711 408 \n", + "3 4 2005-05-24 23:04:41 2452 333 \n", + "4 5 2005-05-24 23:05:21 2079 222 \n", + "... ... ... ... ... \n", + "16039 16045 2005-08-23 22:25:26 772 14 \n", + "16040 16046 2005-08-23 22:26:47 4364 74 \n", + "16041 16047 2005-08-23 22:42:48 2088 114 \n", + "16042 16048 2005-08-23 22:43:07 2019 103 \n", + "16043 16049 2005-08-23 22:50:12 2666 393 \n", + "\n", + " return_date staff_id last_update \n", + "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 \n", + "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 \n", + "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 \n", + "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 \n", + "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 \n", + "... ... ... ... \n", + "16039 2005-08-25 23:54:26 1 2006-02-15 21:30:53 \n", + "16040 2005-08-27 18:02:47 2 2006-02-15 21:30:53 \n", + "16041 2005-08-25 02:48:48 2 2006-02-15 21:30:53 \n", + "16042 2005-08-31 21:33:07 1 2006-02-15 21:30:53 \n", + "16043 2005-08-30 01:01:12 2 2006-02-15 21:30:53 \n", + "\n", + "[16044 rows x 7 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.read_sql(\" select * from rental;\", engine)" + ] + }, + { + "cell_type": "markdown", + "id": "6b35a8b5", + "metadata": {}, + "source": [ + "+++ Question 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76a1878c", + "metadata": {}, + "outputs": [], + "source": [ + "#building SQL logic\n", + "\"\"\"\n", + "\n", + "\n", + "select *\n", + "from rental \n", + "where MONTH (rental_date) = :month\n", + " AND\n", + " YEAR (rental_date) = :year\n", + "\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "66d37f71", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "def rentals_month(engine, month, year):\n", + " \"\"\"\n", + " Retrieve rental data for a given month and year from the rental table.\n", + " Returns a pandas DataFrame (empty if no data).\n", + " \"\"\"\n", + " \n", + " query = \"\"\"\n", + " SELECT *\n", + " FROM rental\n", + " WHERE MONTH(rental_date) = %(month)s\n", + " AND YEAR(rental_date) = %(year)s;\n", + " \"\"\"\n", + " \n", + " df = pd.read_sql(\n", + " query,\n", + " con=engine,\n", + " params={\"month\": month, \"year\": year}\n", + " )\n", + " \n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "5ca39660", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_update
012005-05-24 22:53:303671302005-05-26 22:04:3012006-02-15 21:30:53
122005-05-24 22:54:3315254592005-05-28 19:40:3312006-02-15 21:30:53
232005-05-24 23:03:3917114082005-06-01 22:12:3912006-02-15 21:30:53
342005-05-24 23:04:4124523332005-06-03 01:43:4122006-02-15 21:30:53
452005-05-24 23:05:2120792222005-06-02 04:33:2112006-02-15 21:30:53
\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1 2005-05-24 22:53:30 367 130 \n", + "1 2 2005-05-24 22:54:33 1525 459 \n", + "2 3 2005-05-24 23:03:39 1711 408 \n", + "3 4 2005-05-24 23:04:41 2452 333 \n", + "4 5 2005-05-24 23:05:21 2079 222 \n", + "\n", + " return_date staff_id last_update \n", + "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 \n", + "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 \n", + "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 \n", + "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 \n", + "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = rentals_month(engine, 5, 2005)\n", + "df.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8fc46a3d", + "metadata": {}, + "outputs": [], + "source": [ + "### Same as aove but with more info on the names of titles \n", + "\n", + "\"\"\"\n", + "SELECT\n", + " r.*,\n", + " i.film_id,\n", + " f.title,\n", + " f.release_year,\n", + " f.rental_rate\n", + "FROM rental r\n", + "LEFT JOIN inventory i\n", + " ON r.inventory_id = i.inventory_id\n", + "LEFT JOIN film f\n", + " ON i.film_id = f.film_id\n", + "WHERE MONTH(r.rental_date) = :month\n", + " AND YEAR(r.rental_date) = :year;\n", + "\n", + "\n", + "\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "482b5923", + "metadata": {}, + "outputs": [], + "source": [ + "def rentals_month_with_film(engine, month, year):\n", + " \"\"\"\n", + " Retrieve rental data for a given month and year,\n", + " joined with inventory and film tables.\n", + " Returns a pandas DataFrame.\n", + " \"\"\"\n", + " \n", + " query = \"\"\"\n", + " SELECT\n", + " r.*,\n", + " i.film_id,\n", + " f.title,\n", + " f.release_year,\n", + " f.rental_rate\n", + " FROM rental r\n", + " LEFT JOIN inventory i\n", + " ON r.inventory_id = i.inventory_id\n", + " LEFT JOIN film f\n", + " ON i.film_id = f.film_id\n", + " WHERE MONTH(r.rental_date) = %(month)s\n", + " AND YEAR(r.rental_date) = %(year)s;\n", + " \"\"\"\n", + " \n", + " df = pd.read_sql(\n", + " query,\n", + " con=engine,\n", + " params={\"month\": month, \"year\": year}\n", + " )\n", + " \n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "4ab4d69d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_updatefilm_idtitlerelease_yearrental_rate
012005-05-24 22:53:303671302005-05-26 22:04:3012006-02-15 21:30:5380BLANKET BEVERLY20062.99
122005-05-24 22:54:3315254592005-05-28 19:40:3312006-02-15 21:30:53333FREAKY POCUS20062.99
232005-05-24 23:03:3917114082005-06-01 22:12:3912006-02-15 21:30:53373GRADUATE LORD20062.99
342005-05-24 23:04:4124523332005-06-03 01:43:4122006-02-15 21:30:53535LOVE SUICIDES20060.99
452005-05-24 23:05:2120792222005-06-02 04:33:2112006-02-15 21:30:53450IDOLS SNATCHERS20062.99
\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1 2005-05-24 22:53:30 367 130 \n", + "1 2 2005-05-24 22:54:33 1525 459 \n", + "2 3 2005-05-24 23:03:39 1711 408 \n", + "3 4 2005-05-24 23:04:41 2452 333 \n", + "4 5 2005-05-24 23:05:21 2079 222 \n", + "\n", + " return_date staff_id last_update film_id title \\\n", + "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 80 BLANKET BEVERLY \n", + "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 333 FREAKY POCUS \n", + "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 373 GRADUATE LORD \n", + "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 535 LOVE SUICIDES \n", + "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 450 IDOLS SNATCHERS \n", + "\n", + " release_year rental_rate \n", + "0 2006 2.99 \n", + "1 2006 2.99 \n", + "2 2006 2.99 \n", + "3 2006 0.99 \n", + "4 2006 2.99 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_joined = rentals_month_with_film(engine, 5, 2005)\n", + "df_joined.head()\n" + ] + }, + { + "cell_type": "markdown", + "id": "65e760bc", + "metadata": {}, + "source": [ + "+++ Q3" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "2225bc2e", + "metadata": {}, + "outputs": [], + "source": [ + "def rental_count_month(df_rentals, month, year):\n", + " \"\"\"\n", + " Returns a DataFrame with the number of rentals per customer\n", + " for a given month and year.\n", + " \"\"\"\n", + "\n", + " # Create dynamic column name\n", + " column_name = f\"rentals_{month:02d}_{year}\"\n", + "\n", + " # Handle empty input DataFrame\n", + " if df_rentals.empty:\n", + " return pd.DataFrame(columns=[\"customer_id\", column_name])\n", + "\n", + " # Group and count rentals\n", + " df_counts = (\n", + " df_rentals\n", + " .groupby(\"customer_id\")[\"rental_id\"]\n", + " .count()\n", + " .reset_index()\n", + " .rename(columns={\"rental_id\": column_name})\n", + " )\n", + "\n", + " return df_counts\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "92cf3a2d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_05_2005
012
121
232
353
463
\n", + "
" + ], + "text/plain": [ + " customer_id rentals_05_2005\n", + "0 1 2\n", + "1 2 1\n", + "2 3 2\n", + "3 5 3\n", + "4 6 3" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "### calling the above function \n", + "\n", + "month = 5\n", + "year = 2005\n", + "\n", + "df_rentals = rentals_month(engine, month, year)\n", + "df_rental_counts = rental_count_month(df_rentals, month, year)\n", + "\n", + "df_rental_counts.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "5643bf0f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_update
012005-05-24 22:53:303671302005-05-26 22:04:3012006-02-15 21:30:53
122005-05-24 22:54:3315254592005-05-28 19:40:3312006-02-15 21:30:53
232005-05-24 23:03:3917114082005-06-01 22:12:3912006-02-15 21:30:53
342005-05-24 23:04:4124523332005-06-03 01:43:4122006-02-15 21:30:53
452005-05-24 23:05:2120792222005-06-02 04:33:2112006-02-15 21:30:53
........................
115111532005-05-31 21:36:4427255062005-06-10 01:26:4422006-02-15 21:30:53
115211542005-05-31 21:42:092732592005-06-08 16:40:0912006-02-15 21:30:53
115311552005-05-31 22:17:1120482512005-06-04 20:27:1122006-02-15 21:30:53
115411562005-05-31 22:37:344601062005-06-01 23:02:3422006-02-15 21:30:53
115511572005-05-31 22:47:451449612005-06-02 18:01:4512006-02-15 21:30:53
\n", + "

1156 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1 2005-05-24 22:53:30 367 130 \n", + "1 2 2005-05-24 22:54:33 1525 459 \n", + "2 3 2005-05-24 23:03:39 1711 408 \n", + "3 4 2005-05-24 23:04:41 2452 333 \n", + "4 5 2005-05-24 23:05:21 2079 222 \n", + "... ... ... ... ... \n", + "1151 1153 2005-05-31 21:36:44 2725 506 \n", + "1152 1154 2005-05-31 21:42:09 2732 59 \n", + "1153 1155 2005-05-31 22:17:11 2048 251 \n", + "1154 1156 2005-05-31 22:37:34 460 106 \n", + "1155 1157 2005-05-31 22:47:45 1449 61 \n", + "\n", + " return_date staff_id last_update \n", + "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 \n", + "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 \n", + "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 \n", + "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 \n", + "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 \n", + "... ... ... ... \n", + "1151 2005-06-10 01:26:44 2 2006-02-15 21:30:53 \n", + "1152 2005-06-08 16:40:09 1 2006-02-15 21:30:53 \n", + "1153 2005-06-04 20:27:11 2 2006-02-15 21:30:53 \n", + "1154 2005-06-01 23:02:34 2 2006-02-15 21:30:53 \n", + "1155 2005-06-02 18:01:45 1 2006-02-15 21:30:53 \n", + "\n", + "[1156 rows x 7 columns]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_rentals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a591f121", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1a35ec0", + "metadata": {}, + "outputs": [], + "source": [ + "### Q4:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "ba3ad640", + "metadata": {}, + "outputs": [], + "source": [ + "def compare_rentals(df1, df2):\n", + " \"\"\"\n", + " Compares rental counts between two months and returns\n", + " a DataFrame with a difference column.\n", + " \"\"\"\n", + "\n", + " # Identify rental columns (excluding customer_id)\n", + " col1 = [col for col in df1.columns if col != \"customer_id\"][0]\n", + " col2 = [col for col in df2.columns if col != \"customer_id\"][0]\n", + "\n", + " # Merge on customer_id\n", + " df_combined = pd.merge(\n", + " df1,\n", + " df2,\n", + " on=\"customer_id\",\n", + " how=\"outer\"\n", + " )\n", + "\n", + " # Replace NaN with 0 for calculations\n", + " df_combined[[col1, col2]] = df_combined[[col1, col2]].fillna(0)\n", + "\n", + " # Calculate difference\n", + " df_combined[\"difference\"] = df_combined[col1] - df_combined[col2]\n", + "\n", + " return df_combined\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e782d0da", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}