diff --git a/lab-connection.ipynb b/lab-connection.ipynb new file mode 100644 index 0000000..e2e4568 --- /dev/null +++ b/lab-connection.ipynb @@ -0,0 +1,272 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 22, + "id": "8bc9fd2c", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import pymysql \n", + "from sqlalchemy import create_engine\n", + "import getpass # To get the password without showing the input\n", + "password = getpass.getpass()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "770a7cf3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Engine(mysql+pymysql://root:***@localhost/sakila)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bd = \"sakila\"\n", + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/'+bd\n", + "engine = create_engine(connection_string)\n", + "engine\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "d5cd0500", + "metadata": {}, + "outputs": [], + "source": [ + "def rentals_month(engine, month, year):\n", + " query = text(\"\"\"\n", + " SELECT\n", + " rental_id,\n", + " customer_id,\n", + " rental_date\n", + " FROM rental\n", + " WHERE MONTH(rental_date) = :month\n", + " AND YEAR(rental_date) = :year\n", + " \"\"\")\n", + " \n", + " with engine.connect() as connection:\n", + " df = pd.read_sql(query, connection, params={\n", + " \"month\": month,\n", + " \"year\": year\n", + " })\n", + " \n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "a35b1de2", + "metadata": {}, + "outputs": [], + "source": [ + "def rental_count_month(df, month, year):\n", + " column_name = f\"alugueis_{month:02d}_{year}\"\n", + " \n", + " result = (\n", + " df.groupby(\"customer_id\")\n", + " .size()\n", + " .reset_index(name=column_name)\n", + " )\n", + " \n", + " return result\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "736d9b75", + "metadata": {}, + "outputs": [], + "source": [ + "def compare_rentals(df1, df2):\n", + " df_combined = pd.merge(\n", + " df1,\n", + " df2,\n", + " on=\"customer_id\",\n", + " how=\"inner\"\n", + " )\n", + " \n", + " col1 = df1.columns[1]\n", + " col2 = df2.columns[1]\n", + " \n", + " df_combined[\"diferenca\"] = df_combined[col2] - df_combined[col1]\n", + " \n", + " return df_combined\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "e5206050", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "May rentals shape: (1156, 3)\n", + " rental_id customer_id rental_date\n", + "0 1 130 2005-05-24 22:53:30\n", + "1 2 459 2005-05-24 22:54:33\n", + "2 3 408 2005-05-24 23:03:39\n", + "3 4 333 2005-05-24 23:04:41\n", + "4 5 222 2005-05-24 23:05:21\n", + " customer_id alugueis_05_2005\n", + "0 1 2\n", + "1 2 1\n", + "2 3 2\n", + "3 5 3\n", + "4 6 3\n", + "June rentals shape: (2311, 3)\n", + " rental_id customer_id rental_date\n", + "0 1158 416 2005-06-14 22:53:33\n", + "1 1159 516 2005-06-14 22:55:13\n", + "2 1160 239 2005-06-14 23:00:34\n", + "3 1161 285 2005-06-14 23:07:08\n", + "4 1162 310 2005-06-14 23:09:38\n", + " customer_id alugueis_06_2005\n", + "0 1 7\n", + "1 2 1\n", + "2 3 4\n", + "3 4 6\n", + "4 5 5\n" + ] + }, + { + "data": { + "text/html": [ + "
| \n", + " | customer_id | \n", + "alugueis_05_2005 | \n", + "alugueis_06_2005 | \n", + "diferenca | \n", + "
|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "2 | \n", + "7 | \n", + "5 | \n", + "
| 1 | \n", + "2 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "
| 2 | \n", + "3 | \n", + "2 | \n", + "4 | \n", + "2 | \n", + "
| 3 | \n", + "5 | \n", + "3 | \n", + "5 | \n", + "2 | \n", + "
| 4 | \n", + "6 | \n", + "3 | \n", + "4 | \n", + "1 | \n", + "