diff --git a/lab_sql_python_connection.ipynb b/lab_sql_python_connection.ipynb new file mode 100644 index 0000000..581aeb4 --- /dev/null +++ b/lab_sql_python_connection.ipynb @@ -0,0 +1,138 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "ee78eee4", + "metadata": {}, + "outputs": [], + "source": [ + "from sqlalchemy import create_engine\n", + "import pandas as pd \n", + "import getpass \n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "aaefef7f", + "metadata": {}, + "outputs": [], + "source": [ + "engine = create_engine(\"mysql+pymysql://root:\" + password + \"@localhost:3306/sakila\")\n", + "\n", + "def rentals_month(engine,month,year):\n", + " query = '''\n", + " SELECT\n", + " rental_id,\n", + " rental_date,\n", + " customer_id\n", + " FROM rental\n", + " WHERE MONTH(rental_date) =%s\n", + " AND YEAR(rental_date) =%s\n", + " '''\n", + " df = pd.read_sql(query,engine, params =(month, year))\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d056b7b5", + "metadata": {}, + "outputs": [], + "source": [ + "def rental_count_month(df, month, year):\n", + " column_name = f\"rentals_{month:02d}_{year}\"\n", + " \n", + " result = (\n", + " df.groupby(\"customer_id\")\n", + " .size()\n", + " .reset_index(name=column_name)\n", + " )\n", + " \n", + " return result\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "bfaa16f3", + "metadata": {}, + "outputs": [], + "source": [ + "def compare_rentals(df1, df2):\n", + " combined = pd.merge(df1, df2, on=\"customer_id\", how=\"outer\")\n", + " \n", + " combined = combined.fillna(0)\n", + " \n", + " rental_cols = combined.columns.drop(\"customer_id\")\n", + " \n", + " combined[\"difference\"] = combined[rental_cols[1]] - combined[rental_cols[0]]\n", + " \n", + " return combined" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "06b7874e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " customer_id rentals_05_2005 rentals_06_2005 difference\n", + "0 1 2.0 7.0 5.0\n", + "1 2 1.0 1.0 0.0\n", + "2 3 2.0 4.0 2.0\n", + "3 4 0.0 6.0 6.0\n", + "4 5 3.0 5.0 2.0\n" + ] + } + ], + "source": [ + "may_df = rentals_month(engine, 5, 2005)\n", + "june_df = rentals_month(engine, 6, 2005)\n", + "\n", + "may_counts = rental_count_month(may_df, 5, 2005)\n", + "june_counts = rental_count_month(june_df, 6, 2005)\n", + "\n", + "comparison = compare_rentals(may_counts, june_counts)\n", + "\n", + "print(comparison.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b931f46", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}