Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
272 changes: 272 additions & 0 deletions lab-connection.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 22,
"id": "8bc9fd2c",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pymysql \n",
"from sqlalchemy import create_engine\n",
"import getpass # To get the password without showing the input\n",
"password = getpass.getpass()\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "770a7cf3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Engine(mysql+pymysql://root:***@localhost/sakila)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bd = \"sakila\"\n",
"connection_string = 'mysql+pymysql://root:' + password + '@localhost/'+bd\n",
"engine = create_engine(connection_string)\n",
"engine\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "d5cd0500",
"metadata": {},
"outputs": [],
"source": [
"def rentals_month(engine, month, year):\n",
" query = text(\"\"\"\n",
" SELECT\n",
" rental_id,\n",
" customer_id,\n",
" rental_date\n",
" FROM rental\n",
" WHERE MONTH(rental_date) = :month\n",
" AND YEAR(rental_date) = :year\n",
" \"\"\")\n",
" \n",
" with engine.connect() as connection:\n",
" df = pd.read_sql(query, connection, params={\n",
" \"month\": month,\n",
" \"year\": year\n",
" })\n",
" \n",
" return df\n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "a35b1de2",
"metadata": {},
"outputs": [],
"source": [
"def rental_count_month(df, month, year):\n",
" column_name = f\"alugueis_{month:02d}_{year}\"\n",
" \n",
" result = (\n",
" df.groupby(\"customer_id\")\n",
" .size()\n",
" .reset_index(name=column_name)\n",
" )\n",
" \n",
" return result\n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "736d9b75",
"metadata": {},
"outputs": [],
"source": [
"def compare_rentals(df1, df2):\n",
" df_combined = pd.merge(\n",
" df1,\n",
" df2,\n",
" on=\"customer_id\",\n",
" how=\"inner\"\n",
" )\n",
" \n",
" col1 = df1.columns[1]\n",
" col2 = df2.columns[1]\n",
" \n",
" df_combined[\"diferenca\"] = df_combined[col2] - df_combined[col1]\n",
" \n",
" return df_combined\n"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "e5206050",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"May rentals shape: (1156, 3)\n",
" rental_id customer_id rental_date\n",
"0 1 130 2005-05-24 22:53:30\n",
"1 2 459 2005-05-24 22:54:33\n",
"2 3 408 2005-05-24 23:03:39\n",
"3 4 333 2005-05-24 23:04:41\n",
"4 5 222 2005-05-24 23:05:21\n",
" customer_id alugueis_05_2005\n",
"0 1 2\n",
"1 2 1\n",
"2 3 2\n",
"3 5 3\n",
"4 6 3\n",
"June rentals shape: (2311, 3)\n",
" rental_id customer_id rental_date\n",
"0 1158 416 2005-06-14 22:53:33\n",
"1 1159 516 2005-06-14 22:55:13\n",
"2 1160 239 2005-06-14 23:00:34\n",
"3 1161 285 2005-06-14 23:07:08\n",
"4 1162 310 2005-06-14 23:09:38\n",
" customer_id alugueis_06_2005\n",
"0 1 7\n",
"1 2 1\n",
"2 3 4\n",
"3 4 6\n",
"4 5 5\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>alugueis_05_2005</th>\n",
" <th>alugueis_06_2005</th>\n",
" <th>diferenca</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" customer_id alugueis_05_2005 alugueis_06_2005 diferenca\n",
"0 1 2 7 5\n",
"1 2 1 1 0\n",
"2 3 2 4 2\n",
"3 5 3 5 2\n",
"4 6 3 4 1"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"may_df = rentals_month(engine, 5, 2005)\n",
"print(\"May rentals shape:\", may_df.shape)\n",
"print(may_df.head())\n",
"\n",
"may_count = rental_count_month(may_df, 5, 2005)\n",
"print(may_count.head())\n",
"\n",
"june_df = rentals_month(engine, 6, 2005)\n",
"print(\"June rentals shape:\", june_df.shape)\n",
"print(june_df.head())\n",
"\n",
"june_count = rental_count_month(june_df, 6, 2005)\n",
"print(june_count.head())\n",
"\n",
"comparison = compare_rentals(may_count, june_count)\n",
"comparison.head()\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}