diff --git a/inventory-reconciliation/ExploratoryDataAnalysis.ipynb b/inventory-reconciliation/ExploratoryDataAnalysis.ipynb new file mode 100644 index 0000000..5f40824 --- /dev/null +++ b/inventory-reconciliation/ExploratoryDataAnalysis.ipynb @@ -0,0 +1,1748 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "12db1492", + "metadata": {}, + "source": [ + "### Import Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a3b20ce2", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd \n", + "import numpy as np \n", + "import matplotlib.pyplot as plt\n" + ] + }, + { + "cell_type": "markdown", + "id": "034b1650", + "metadata": {}, + "source": [ + "### Data Loading" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "401cbf42", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape of before_data (75, 5)\n", + "\n", + "Columns in before_data Index(['sku', 'name', 'quantity', 'location', 'last_counted'], dtype='object')\n", + "\n", + "Data summary\n", + "\n", + "\n", + "RangeIndex: 75 entries, 0 to 74\n", + "Data columns (total 5 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 sku 75 non-null object\n", + " 1 name 75 non-null object\n", + " 2 quantity 75 non-null int64 \n", + " 3 location 75 non-null object\n", + " 4 last_counted 75 non-null object\n", + "dtypes: int64(1), object(4)\n", + "memory usage: 3.1+ KB\n", + "None\n" + ] + } + ], + "source": [ + "before_data = pd.read_csv(\"data/snapshot_1.csv\")\n", + "print(f\"Shape of before_data {before_data.shape}\")\n", + "print(f\"\\nColumns in before_data {before_data.columns}\")\n", + "print(f\"\\nData summary\\n\")\n", + "print(before_data.info())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b44445e9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
skunamequantitylocationlast_counted
47SKU-048Heat Shrink Tubing500Warehouse C2024-01-08
19SKU-020Ethernet Cable Cat6a400Warehouse C2024-01-08
64SKU-065Ring Light 10in40Warehouse A2024-01-08
65SKU-066Ring Light 18in25Warehouse A2024-01-08
60SKU-061Desk Organizer90Warehouse B2024-01-08
\n", + "
" + ], + "text/plain": [ + " sku name quantity location last_counted\n", + "47 SKU-048 Heat Shrink Tubing 500 Warehouse C 2024-01-08\n", + "19 SKU-020 Ethernet Cable Cat6a 400 Warehouse C 2024-01-08\n", + "64 SKU-065 Ring Light 10in 40 Warehouse A 2024-01-08\n", + "65 SKU-066 Ring Light 18in 25 Warehouse A 2024-01-08\n", + "60 SKU-061 Desk Organizer 90 Warehouse B 2024-01-08" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(before_data.sample(5))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8b8af1e5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape of after_data (79, 5)\n", + "\n", + "Columns in after_data Index(['sku', 'product_name', 'qty', 'warehouse', 'updated_at'], dtype='object')\n", + "\n", + "Data summary\n", + "\n", + "\n", + "RangeIndex: 79 entries, 0 to 78\n", + "Data columns (total 5 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 sku 79 non-null object \n", + " 1 product_name 79 non-null object \n", + " 2 qty 79 non-null float64\n", + " 3 warehouse 79 non-null object \n", + " 4 updated_at 79 non-null object \n", + "dtypes: float64(1), object(4)\n", + "memory usage: 3.2+ KB\n", + "None\n" + ] + } + ], + "source": [ + "after_data = pd.read_csv(\"data/snapshot_2.csv\")\n", + "print(f\"Shape of after_data {after_data.shape}\")\n", + "print(f\"\\nColumns in after_data {after_data.columns}\")\n", + "print(f\"\\nData summary\\n\")\n", + "print(after_data.info())" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ded7e22e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
skuproduct_nameqtywarehouseupdated_at
0SKU-001Widget A145.0Warehouse A2024-01-15
1SKU-002Widget B70.0Warehouse A2024-01-15
2SKU-003Gadget Pro185.0Warehouse B2024-01-15
3SKU-004Gadget Lite48.0Warehouse A2024-01-15
4SKU005Connector Cable 6ft480.0Warehouse C2024-01-15
\n", + "
" + ], + "text/plain": [ + " sku product_name qty warehouse updated_at\n", + "0 SKU-001 Widget A 145.0 Warehouse A 2024-01-15\n", + "1 SKU-002 Widget B 70.0 Warehouse A 2024-01-15\n", + "2 SKU-003 Gadget Pro 185.0 Warehouse B 2024-01-15\n", + "3 SKU-004 Gadget Lite 48.0 Warehouse A 2024-01-15\n", + "4 SKU005 Connector Cable 6ft 480.0 Warehouse C 2024-01-15" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "after_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c6dea354", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
skuproduct_nameqtywarehouseupdated_at
22SKU-023HDMI Cable 10ft210.0Warehouse A2024-01-15
70SKU-072XLR Microphone18.0Warehouse A2024-01-15
30SKU-033Extension Cord 25ft172.0Warehouse B2024-01-15
65SKU-067Tripod Small52.0Warehouse B2024-01-15
18SKU-019Ethernet Cable Cat6580.0Warehouse C2024-01-15
61SKU-063Headphone Hook192.0Warehouse B2024-01-15
5SKU-006Connector Cable 10ft350.0Warehouse C2024-01-15
7sku-008Power Supply Unit Pro42.0Warehouse A2024-01-15
76SKU-078Capture Card12.0Warehouse A2024-01-15
75SKU-077Stream Deck XL8.0Warehouse A2024-01-15
\n", + "
" + ], + "text/plain": [ + " sku product_name qty warehouse updated_at\n", + "22 SKU-023 HDMI Cable 10ft 210.0 Warehouse A 2024-01-15\n", + "70 SKU-072 XLR Microphone 18.0 Warehouse A 2024-01-15\n", + "30 SKU-033 Extension Cord 25ft 172.0 Warehouse B 2024-01-15\n", + "65 SKU-067 Tripod Small 52.0 Warehouse B 2024-01-15\n", + "18 SKU-019 Ethernet Cable Cat6 580.0 Warehouse C 2024-01-15\n", + "61 SKU-063 Headphone Hook 192.0 Warehouse B 2024-01-15\n", + "5 SKU-006 Connector Cable 10ft 350.0 Warehouse C 2024-01-15\n", + "7 sku-008 Power Supply Unit Pro 42.0 Warehouse A 2024-01-15\n", + "76 SKU-078 Capture Card 12.0 Warehouse A 2024-01-15\n", + "75 SKU-077 Stream Deck XL 8.0 Warehouse A 2024-01-15" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(after_data.sample(10))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "857373ec", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " quantity\n", + "count 75.000000\n", + "mean 257.200000\n", + "std 334.512796\n", + "min 15.000000\n", + "25% 52.500000\n", + "50% 150.000000\n", + "75% 300.000000\n", + "max 2000.000000\n" + ] + } + ], + "source": [ + "print(before_data.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7a6a121e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " qty\n", + "count 79.000000\n", + "mean 232.379747\n", + "std 311.725730\n", + "min -5.000000\n", + "25% 43.500000\n", + "50% 115.000000\n", + "75% 285.000000\n", + "max 1850.000000\n" + ] + } + ], + "source": [ + "print(after_data.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f853745a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Minus value in quanitiy column which is concerning!\n" + ] + } + ], + "source": [ + "print(f\"Minus value in quanitiy column which is concerning!\")" + ] + }, + { + "cell_type": "markdown", + "id": "7bcb4014", + "metadata": {}, + "source": [ + "### Exploratory Data Analysis" + ] + }, + { + "cell_type": "markdown", + "id": "7a9476d5", + "metadata": {}, + "source": [ + "##### Is sku column is formatted consistently? \n", + "\n", + "Length consistency, case consistency, whitespace & hidden characters, pattern consistency, cross-snapshot consistency, uniquess --> all these will silently break merge() and cause problems." + ] + }, + { + "cell_type": "markdown", + "id": "6d51ca9c", + "metadata": {}, + "source": [ + "##### Uniqueness" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "11e3a2fb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape of before_data (75, 5)\n", + "Number of unique sku in the before_data : 75\n", + "\n", + "Shape of after_data (79, 5)\n", + "Number of unique sku in the after_data : 78\n" + ] + } + ], + "source": [ + "print(f\"Shape of before_data {before_data.shape}\")\n", + "print(f\"Number of unique sku in the before_data : {before_data['sku'].nunique()}\")\n", + "#------------------------------------------------\n", + "print(f\"\\nShape of after_data {after_data.shape}\")\n", + "print(f\"Number of unique sku in the after_data : {after_data['sku'].nunique()}\")" + ] + }, + { + "cell_type": "markdown", + "id": "eb767f4d", + "metadata": {}, + "source": [ + "##### Length consistency" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "77b8f90f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sku\n", + "7 75\n", + "Name: count, dtype: int64\n", + "Common length : 7\n", + "Number of sku that don't match most of common length \n", + "\n", + "0\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
skunamequantitylocationlast_counted
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [sku, name, quantity, location, last_counted]\n", + "Index: []" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# See the distribution of SKU lengths\n", + "print(before_data['sku'].str.len().value_counts())\n", + "\n", + "# Find SKUs that don't match the most common length\n", + "common_len_1 = before_data['sku'].str.len().mode()[0]\n", + "print(f\"Common length : {common_len_1}\")\n", + "print(f\"Number of sku that don't match most of common length \\n\")\n", + "print(len(before_data[before_data['sku'].str.len() != common_len_1]))\n", + "display(before_data[before_data['sku'].str.len() != common_len_1].head())" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "6a748252", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sku\n", + "7 77\n", + "6 2\n", + "Name: count, dtype: int64\n", + "Common length : 7\n", + "Number of sku that don't match most of common length \n", + "\n", + "2\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
skuproduct_nameqtywarehouseupdated_at
4SKU005Connector Cable 6ft480.0Warehouse C2024-01-15
17SKU018Ethernet Cable Cat5750.0Warehouse C2024-01-15
\n", + "
" + ], + "text/plain": [ + " sku product_name qty warehouse updated_at\n", + "4 SKU005 Connector Cable 6ft 480.0 Warehouse C 2024-01-15\n", + "17 SKU018 Ethernet Cable Cat5 750.0 Warehouse C 2024-01-15" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# See the distribution of SKU lengths\n", + "print(after_data['sku'].str.len().value_counts())\n", + "\n", + "# Find SKUs that don't match the most common length\n", + "common_len_2 = after_data['sku'].str.len().mode()[0]\n", + "print(f\"Common length : {common_len_2}\")\n", + "print(f\"Number of sku that don't match most of common length \\n\")\n", + "print(len(after_data[after_data['sku'].str.len() != common_len_2]))\n", + "display(after_data[after_data['sku'].str.len() != common_len_2].head())" + ] + }, + { + "cell_type": "markdown", + "id": "c0f88869", + "metadata": {}, + "source": [ + "##### Case consistency" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "7338feae", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "All SKUs are uppercase in before_data\n", + "\n", + "\n", + "Mixed case formatting detected in after_data\n", + "Uppercase: 78, Not uppercase: 1\n" + ] + } + ], + "source": [ + "# Case consistency --> SKU vs SkU\n", + "all_upper_1 = (before_data['sku'] == before_data['sku'].str.upper()).all()\n", + "all_lower_1 = (before_data['sku'] == before_data['sku'].str.lower()).all()\n", + "\n", + "if all_upper_1:\n", + " print(\"All SKUs are uppercase in before_data\")\n", + "elif all_lower_1:\n", + " print(\"All SKUs are lowercase in before_data\")\n", + "else:\n", + " print(\"Mixed case formatting detected in before_data\")\n", + " is_upper_1 = before_data['sku'] == before_data['sku'].str.upper()\n", + " print(f\"Uppercase: {is_upper_1.sum()}, Not uppercase: {(~is_upper_1).sum()}\")\n", + "\n", + "#----------------------------------------------------------\n", + "print(\"\\n\")\n", + "\n", + "all_upper_2 = (after_data['sku'] == after_data['sku'].str.upper()).all()\n", + "all_lower_2 = (after_data['sku'] == after_data['sku'].str.lower()).all()\n", + "\n", + "if all_upper_2:\n", + " print(\"All SKUs are uppercase in after_data\")\n", + "elif all_lower_2:\n", + " print(\"All SKUs are lowercase in after_data\")\n", + "else:\n", + " print(\"Mixed case formatting detected in after_data\")\n", + " is_upper_2 = after_data['sku'] == after_data['sku'].str.upper()\n", + " print(f\"Uppercase: {is_upper_2.sum()}, Not uppercase: {(~is_upper_2).sum()}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "2d371a56", + "metadata": {}, + "source": [ + "##### Whitespaces" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "9a65960a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "White spaces in before_data 0\n", + "White spaces in after_data 0\n" + ] + }, + { + "data": { + "text/plain": [ + "Series([], Name: sku, dtype: object)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Count how many SKUs change after stripping\n", + "has_whitespace_1 = before_data['sku'] != before_data['sku'].str.strip()\n", + "print(f\"White spaces in before_data {has_whitespace_1.sum()}\") \n", + "\n", + "# See the actual offenders\n", + "before_data[has_whitespace_1]['sku'].apply(repr) # repr() makes spaces visible\n", + "\n", + "#-------------------------------------------\n", + "has_whitespace_2 = after_data['sku'] != after_data['sku'].str.strip()\n", + "print(f\"White spaces in after_data {has_whitespace_1.sum()}\") \n", + "\n", + "after_data[has_whitespace_2]['sku'].apply(repr) # repr() makes spaces visible" + ] + }, + { + "cell_type": "markdown", + "id": "3f4d73f4", + "metadata": {}, + "source": [ + "##### Hidden characters" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "c66d3ddf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hidden characters in before_data : 0\n", + "Hidden characters in after_data : 0\n" + ] + }, + { + "data": { + "text/plain": [ + "Series([], Name: sku, dtype: object)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# hidden characters \n", + "# Check if any SKU contains non-printable characters\n", + "has_hidden_1 = before_data['sku'].str.contains(r'[^\\x20-\\x7E]', regex=True)\n", + "print(f\"Hidden characters in before_data : { has_hidden_1.sum()}\")\n", + "before_data[has_hidden_1]['sku'].apply(repr)\n", + "\n", + "#--------------------------------------------\n", + "has_hidden_2 = after_data['sku'].str.contains(r'[^\\x20-\\x7E]', regex=True)\n", + "print(f\"Hidden characters in after_data : { has_hidden_2.sum()}\")\n", + "after_data[has_hidden_2]['sku'].apply(repr)\n" + ] + }, + { + "cell_type": "markdown", + "id": "dad8f0e4", + "metadata": {}, + "source": [ + "##### Null values check" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "fd32f810", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Null values in before_data : \n", + "sku 0\n", + "name 0\n", + "quantity 0\n", + "location 0\n", + "last_counted 0\n", + "dtype: int64\n", + "Null values in after_data : \n", + "sku 0\n", + "product_name 0\n", + "qty 0\n", + "warehouse 0\n", + "updated_at 0\n", + "dtype: int64\n", + "\n", + "At what percentages (before_data): \n", + "sku 0.0\n", + "name 0.0\n", + "quantity 0.0\n", + "location 0.0\n", + "last_counted 0.0\n", + "dtype: float64\n", + "At what percentages (after_data): \n", + "sku 0.0\n", + "product_name 0.0\n", + "qty 0.0\n", + "warehouse 0.0\n", + "updated_at 0.0\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "print(f\"Null values in before_data : \\n{before_data.isnull().sum()}\")\n", + "print(f\"Null values in after_data : \\n{after_data.isnull().sum()}\")\n", + "\n", + "print(f\"\\nAt what percentages (before_data): \\\n", + " \\n{(before_data.isnull().sum() / len(before_data) * 100).round(2)}\")\n", + "print(f\"At what percentages (after_data): \\\n", + " \\n{(after_data.isnull().sum() / len(after_data) * 100).round(2)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "617fb5ac", + "metadata": {}, + "source": [ + "##### Disguised nulls" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "cc7eb6d2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "In before_data: \n", + "\n" + ] + } + ], + "source": [ + "print(f\"In before_data: \\n\")\n", + "for col in before_data.columns:\n", + " suspicious = before_data[col].isin(['', 'N/A', 'None', 'null', '-', 'nan', 'NaN'])\n", + " if suspicious.any():\n", + " print(f\"{col}: {suspicious.sum()} disguised nulls\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "In after_data: \n", + "\n" + ] + } + ], + "source": [ + "print(f\"In after_data: \\n\")\n", + "for col in after_data.columns:\n", + " suspicious = after_data[col].isin(['', 'N/A', 'None', 'null', '-', 'nan', 'NaN'])\n", + " if suspicious.any():\n", + " print(f\"{col}: {suspicious.sum()} disguised nulls\")" + ] + }, + { + "cell_type": "markdown", + "id": "37d796fb", + "metadata": {}, + "source": [ + "##### Duplication checks" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "460b7c89", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Row duplications in before_data : 0\n", + "Row duplications in after_data : 0\n" + ] + } + ], + "source": [ + "# row duplicates \n", + "print(f\"Row duplications in before_data : {before_data.duplicated().sum()}\")\n", + "print(f\"Row duplications in after_data : {after_data.duplicated().sum()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "f1aa2fc5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Key duplications in before_data : 0\n", + "Key duplications in after_data : 1\n" + ] + } + ], + "source": [ + "# key duplicates \n", + "print(f\"Key duplications in before_data : {before_data.duplicated(subset=['sku']).sum()}\")\n", + "print(f\"Key duplications in after_data : {after_data.duplicated(subset=['sku']).sum()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
skuproduct_nameqtywarehouseupdated_at
42SKU-045Multimeter Professional23.0Warehouse A2024-01-15
52SKU-045Multimeter Pro-5.0Warehouse B2024-01-15
\n", + "
" + ], + "text/plain": [ + " sku product_name qty warehouse updated_at\n", + "42 SKU-045 Multimeter Professional 23.0 Warehouse A 2024-01-15\n", + "52 SKU-045 Multimeter Pro -5.0 Warehouse B 2024-01-15" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# show duplicates\n", + "after_data[after_data.duplicated(subset=['sku'],keep=False)]" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
skunamequantitylocationlast_counted
44SKU-045Multimeter Pro25Warehouse A2024-01-08
\n", + "
" + ], + "text/plain": [ + " sku name quantity location last_counted\n", + "44 SKU-045 Multimeter Pro 25 Warehouse A 2024-01-08" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "before_data[before_data['sku']=='SKU-045']" + ] + }, + { + "cell_type": "markdown", + "id": "85668ae5", + "metadata": {}, + "source": [ + "##### Datetime data" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "34804f68", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Min in before data: 2024-01-08 00:00:00\n", + "Max in before data: 2024-01-08 00:00:00\n", + "Unparseable dates: 0\n", + "[]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
skunamequantitylocationlast_counted
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [sku, name, quantity, location, last_counted]\n", + "Index: []" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "converted_1 = pd.to_datetime(before_data['last_counted'], errors='coerce')\n", + "\n", + "print(f\"Min in before data: {converted_1.min()}\")\n", + "print(f\"Max in before data: {converted_1.max()}\")\n", + "\n", + "failed_1 = before_data[converted_1.isna() & before_data['last_counted'].notna()]\n", + "print(f\"Unparseable dates: {len(failed_1)}\")\n", + "print(failed_1['last_counted'].unique())\n", + "\n", + "display(failed_1.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "a4a7ce39", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Min in after data: 2024-01-15 00:00:00\n", + "Max in after data: 2024-01-15 00:00:00\n", + "Unparseable dates: 1\n", + "['01/15/2024']\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
skuproduct_nameqtywarehouseupdated_at
32SKU-035Cable Ties 100pk1420.0Warehouse C01/15/2024
\n", + "
" + ], + "text/plain": [ + " sku product_name qty warehouse updated_at\n", + "32 SKU-035 Cable Ties 100pk 1420.0 Warehouse C 01/15/2024" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "converted_2 = pd.to_datetime(after_data['updated_at'], errors='coerce')\n", + "\n", + "print(f\"Min in after data: {converted_2.min()}\")\n", + "print(f\"Max in after data: {converted_2.max()}\")\n", + "\n", + "failed_2 = after_data[converted_2.isna() & after_data['updated_at'].notna()]\n", + "print(f\"Unparseable dates: {len(failed_2)}\")\n", + "print(failed_2['updated_at'].unique())\n", + "\n", + "display(failed_2.head())" + ] + }, + { + "cell_type": "markdown", + "id": "5328a995", + "metadata": {}, + "source": [ + "##### SKU vs Product name combination mismatch" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "9b8144cd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mismatches: 1\n", + " sku name product_name\n", + "39 SKU-045 Multimeter Pro Multimeter Professional\n" + ] + } + ], + "source": [ + "\n", + "merged = before_data.merge(after_data, left_on='sku', right_on='sku', how='inner')\n", + "\n", + "\n", + "mismatched = merged[\n", + " merged['name'].str.strip().str.lower() != merged['product_name'].str.strip().str.lower()\n", + "]\n", + "\n", + "print(f\"Mismatches: {len(mismatched)}\")\n", + "print(mismatched[['sku', 'name', 'product_name']])" + ] + }, + { + "cell_type": "markdown", + "id": "a3693360", + "metadata": {}, + "source": [ + "##### Basic stats" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "481fbad3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Snapshot 1 ===\n", + " quantity\n", + "count 75.000000\n", + "mean 257.200000\n", + "std 334.512796\n", + "min 15.000000\n", + "25% 52.500000\n", + "50% 150.000000\n", + "75% 300.000000\n", + "max 2000.000000\n", + "\n", + "=== Snapshot 2 ===\n", + " qty\n", + "count 79.000000\n", + "mean 232.379747\n", + "std 311.725730\n", + "min -5.000000\n", + "25% 43.500000\n", + "50% 115.000000\n", + "75% 285.000000\n", + "max 1850.000000\n" + ] + } + ], + "source": [ + "print(\"=== Snapshot 1 ===\")\n", + "print(before_data.describe())\n", + "print(\"\\n=== Snapshot 2 ===\")\n", + "print(after_data.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "c41756ea", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "location\n", + "Warehouse A 33\n", + "Warehouse B 23\n", + "Warehouse C 19\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "before_data['location'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "87bd026f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "warehouse\n", + "Warehouse A 38\n", + "Warehouse B 22\n", + "Warehouse C 19\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "after_data['warehouse'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "eabeef49", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAHqCAYAAAAZLi26AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAATBVJREFUeJzt3Xl8jXf+///nkU0iC0K2IkkrqC1TSwlqa2ljqWLsRVDDoEMxWjXqZL4tHYbSUdrpkvBpbe0HXzNqb6JVS+1aTBmNPRFSEoKE5Pr90a/z62ki4siVk8Tjfrud2y3Xdb3P+3qd83bl5Ol6X9exGIZhCAAAAAAAFLlyzi4AAAAAAICyitANAAAAAIBJCN0AAAAAAJiE0A0AAAAAgEkI3QAAAAAAmITQDQAAAACASQjdAAAAAACYhNANAAAAAIBJCN0AAAAAAJiE0A0AKNV27typXr16KTg4WO7u7goODlbv3r21e/duZ5dm5/z587JarTpw4ECebVarVRaLxW7dggULFB8fX+R1WCwW28PFxUWVKlVSZGSkRowYoZ07d+Zpf/LkSVkslvuuZcmSJZo7d+59PSe/fd15by5dunRffRXkyJEjslqtOnnyZJ5tMTExCgsLK7J9AQBA6AYAlFr/+Mc/1LJlS509e1YzZ87U5s2bNWvWLJ05c0bNmzfXP//5T2eXaHP+/HnFxsbmG7pfeukl7dixw26dWaFbkn7/+99rx44d2rZtm5YtW6ZBgwZp586dioqK0tixY+3aBgcHa8eOHercufN97cOR0O3ovu7XkSNHFBsbm2/onjp1qlatWmXq/gEADxdXZxcAAIAjvv32W40bN06dOnXSqlWr5Or6/3+k9e3bV927d9eoUaP0xBNPqGnTpk6s9N6qVaumatWqFdv+AgMD1bx5c9vys88+q3HjxukPf/iD3n33XdWpU0d//OMfJUkeHh52bc2Qk5Oj27dvF8u+7uWxxx5z6v4BAGUPZ7oBAKXSjBkzZLFYtHDhQrvALUmurq5asGCBrd0dd5s6nN/07vfee0+tW7dWQECAKlSooAYNGmjmzJm6deuWXbu2bduqfv362r17t5566il5eXnp0Ucf1dtvv63c3FxJUmJioi34DxkyxDa922q15rv/sLAwHT58WFu3brW1DQsL07Vr11SxYkWNGDEiz2s4efKkXFxcNGvWrEK+g/ZcXFw0f/58ValSxa6P/KZ8X7x4UX/4wx9UvXp1eXh4qGrVqmrZsqU2b95se0/Wrl2rU6dO2U1n/3V/M2fO1Jtvvqnw8HB5eHgoISGhwKnsZ86cUY8ePeTr6ys/Pz+9+OKLunjxol2bX7+nvxYWFqaYmBhJUnx8vHr16iVJateuna22O/vM79/IzZs3NXnyZIWHh8vd3V2PPPKIRo8erStXruTZT5cuXbR+/Xo1atRInp6eqlOnjj755JN7vPsAgLKMM90AgFInJydHCQkJatKkyV3PEFevXl2NGzfW5s2blZubq3Ll7u//mU+cOKH+/fvbgtbBgwf11ltv6T//+U+eEJWSkqIBAwZowoQJmjZtmlatWqXJkycrJCREgwYNUqNGjRQXF6chQ4boL3/5i2369N1qX7VqlX7/+9/Lz8/P9p8HHh4e8vb21tChQ/XPf/5TM2fOlJ+fn+05CxYskLu7u4YOHXpfr/PXPD099cwzz2jZsmU6e/bsXesbOHCg9u3bp7feeku1atXSlStXtG/fPqWlpdlq+cMf/qATJ07cdar2u+++q1q1aunvf/+7fH19FRERUWBt3bt3V+/evTVy5EgdPnxYU6dO1ZEjR7Rr1y65ubkV+jV27txZ06dP1+uvv6733ntPjRo1knT3M9yGYeiFF17Qli1bNHnyZD311FM6dOiQpk2bph07dmjHjh3y8PCwtT948KAmTJig1157TYGBgfroo480bNgw1axZU61bty50nQCAsoPQDQAodS5duqTr168rPDy8wHbh4eH67rvv9PPPP6tKlSr3tY85c+bYfs7NzdVTTz0lf39/DRkyRLNnz1alSpVs29PS0vTll1/qySeflCQ988wzSkxM1JIlSzRo0CD5+vqqfv36kn4Jd/eaQv3EE0/I09NTvr6+edqOGTNG8+bNU1xcnMaNGyfplzOxn3zyifr16yd/f//7ep2/FRoaKumXa9DvFrq//fZbvfTSSxo+fLhtXbdu3Ww/161bVxUrVixwunj58uW1YcMGu8Cc3zXWd/To0UMzZ86UJHXs2FGBgYEaMGCAVqxYoQEDBhT69VWtWtUW8OvWrXvPsdi4caM2bNigmTNn6s9//rMkqUOHDqpevbr69OmjxYsX270Ply5d0rfffqsaNWpIklq3bq0tW7ZoyZIlhG4AeEgxvRwAUGYZhiFJeaaOF8b+/fv1/PPPy9/fXy4uLnJzc9OgQYOUk5OjY8eO2bUNCgqyBe47GjZsqFOnTjle/F08+uij6tKlixYsWGB7fUuWLFFaWprGjBnzwP3f6bMgTz75pOLj4/Xmm29q586deabcF8bzzz9/X2eofxuse/fuLVdXVyUkJNz3vu/HV199JUm26el39OrVSxUqVNCWLVvs1v/ud7+zBW7pl/9cqFWrlin/FgAApQOhGwBQ6lSpUkVeXl5KSkoqsN3Jkyfl6el532d/T58+raeeekrnzp3TvHnz9M0332j37t167733JEk3btywa59f/x4eHnnaFZWxY8fq+PHj2rRpk6Rfrj+PioqyTZV+EHfCYUhIyF3bLF++XIMHD9ZHH32kqKgoVa5cWYMGDVJKSkqh9xMcHHxfdQUFBdktu7q6yt/f3zal3SxpaWlydXVV1apV7dZbLBYFBQXl2X9x/1sAAJR8hG4AQKnj4uKi9u3ba8+ePTp79my+bc6ePau9e/eqffv2tnXly5dXVlZWnra//Q7o1atXKzMzUytXrtSLL76oVq1aqUmTJnJ3dy/aF+Kg9u3bq379+po/f762b9+uffv2afTo0Q/c740bN7R582Y99thjBd5NvUqVKpo7d65OnjypU6dOacaMGVq5cmWes8EFud/ZB78N9Ldv31ZaWppdyPXw8Mh3fB8kmPv7++v27dt5btpmGIZSUlLu+7IFAMDDh9ANACiVXnvtNRmGoVGjRiknJ8duW05Ojv74xz8qJyfH7nunw8LClJqaqgsXLtjWZWdna8OGDXbPvxMIf32DLMMw9OGHHzpc752+CnvG815nR//0pz9p7dq1mjx5sgIDA2135HZUTk6OxowZo7S0NL366quFfl6NGjU0ZswYdejQQfv27St0/ffrs88+s1tesWKFbt++rbZt29rWhYWF6dChQ3btvvrqK127ds1u3f2MxdNPPy1J+vTTT+3W/+///q8yMzNt2wEAuBtupAYAKJVatmypuXPnauzYsWrVqpXGjBmjGjVq6PTp03rvvfe0Y8cOWa1WdejQwfacPn366I033lDfvn315z//WTdv3tS7776bJ7R36NBB7u7u6tevnyZNmqSbN29q4cKFunz5ssP1PvbYY/L09NRnn32mxx9/XN7e3goJCbnrNO4GDRpo2bJlWr58uR599FGVL19eDRo0sG1/8cUXNXnyZH399df6y1/+cl9n4S9cuKCdO3fKMAxdvXpVP/zwgxYvXqyDBw/qlVdesbsx2G+lp6erXbt26t+/v+rUqSMfHx/t3r1b69evV48ePezqX7lypRYuXKjGjRurXLlyatKkSaFr/K2VK1fK1dVVHTp0sN29PDIyUr1797a1GThwoKZOnao33nhDbdq00ZEjRzR//ny7u7xLst3U7p///Kd8fHxUvnx5hYeH5zs1vEOHDnr22Wf16quvKiMjQy1btrTdvfyJJ57QwIEDHX5NAICHhAEAQCm2fft2o2fPnkZgYKBRrlw5Q5JRvnx5Y+3atfm2//LLL43f/e53hqenp/Hoo48a8+fPN6ZNm2b89iPxX//6lxEZGWmUL1/eeOSRR4w///nPxrp16wxJRkJCgq1dmzZtjHr16uXZz+DBg43Q0FC7dUuXLjXq1KljuLm5GZKMadOmGYZh5Lv/kydPGh07djR8fHwMSXn6MgzDiImJMVxdXY2zZ8/e+436fyTZHuXKlTN8fX2NBg0aGH/4wx+MHTt25GmflJRkSDLi4uIMwzCMmzdvGiNHjjQaNmxo+Pr6Gp6enkbt2rWNadOmGZmZmbbn/fzzz8bvf/97o2LFiobFYrG9vjv9zZo16577+vV7s3fvXqNr166Gt7e34ePjY/Tr18+4cOGC3fOzsrKMSZMmGdWrVzc8PT2NNm3aGAcOHDBCQ0ONwYMH27WdO3euER4ebri4uNjtM79xu3HjhvHqq68aoaGhhpubmxEcHGz88Y9/NC5fvmzXLjQ01OjcuXOe19WmTRujTZs2edYDAB4OFsMoxG1KAQAoJRYvXqzBgwdr0qRJ+tvf/ubsckyTnZ2tsLAwtWrVSitWrHB2OQAA4C6YXg4AKFMGDRqk5ORkvfbaa6pQoYLeeOMNZ5dUpC5evKgff/xRcXFxunDhgl577TVnlwQAAArAmW4AAEqR+Ph4DRkyRMHBwZo2bZpGjBjh7JIAAEABCN0AAAAAAJiErwwDAAAAAMAkhG4AAAAAAExC6AYAAAAAwCRl/u7lubm5On/+vHx8fGSxWJxdDgAAAACgDDAMQ1evXlVISIjKlbv7+ewyH7rPnz+v6tWrO7sMAAAAAEAZdObMGVWrVu2u28t86Pbx8ZH0yxvh6+vr5GoAAAAAAGVBRkaGqlevbsucd1PmQ/edKeW+vr6EbgAAAABAkbrXZczcSA0AAAAAAJMQugEAAAAAMAmhGwAAAAAAk5T5a7oBAAAAoCjk5OTo1q1bzi4DxcTNzU0uLi4P3A+hGwAAAAAKYBiGUlJSdOXKFWeXgmJWsWJFBQUF3fNmaQUhdAMAAABAAe4E7oCAAHl5eT1QAEPpYBiGrl+/rtTUVElScHCww30RugEAAADgLnJycmyB29/f39nloBh5enpKklJTUxUQEODwVHNupAYAAAAAd3HnGm4vLy8nVwJnuDPuD3ItP6EbAAAAAO6BKeUPp6IYd0I3AAAAAAAmIXQDAAAAAJwiJiZGL7zwgrPLMBU3UgMAAACA+2S1luz9paamaurUqVq3bp0uXLigSpUqKTIyUlarVVFRUabU6Czx8fEaN27cPb/SLTk5WRMmTNDevXt1/Phx/elPf9LcuXNNr4/QDQAAAABlTM+ePXXr1i0tWrRIjz76qC5cuKAtW7bo559/dnZpTpOVlaWqVatqypQpeuedd4ptv0wvBwAAAIAy5MqVK9q2bZv+9re/qV27dgoNDdWTTz6pyZMnq3PnzrZ2FotFH330kbp37y4vLy9FRERozZo1tu05OTkaNmyYwsPD5enpqdq1a2vevHl2+7ozPTw2NlYBAQHy9fXViBEjlJ2dbWvzxRdfqEGDBvL09JS/v7+eeeYZZWZm2vXz97//XcHBwfL399fo0aPt7hZ++fJlDRo0SJUqVZKXl5eio6N1/PhxSVJiYqKGDBmi9PR0WSwWWSwWWe8yLSAsLEzz5s3ToEGD5Ofn5/D7e78I3QAAAABQhnh7e8vb21urV69WVlZWgW1jY2PVu3dvHTp0SJ06ddKAAQNsZ8Nzc3NVrVo1rVixQkeOHNEbb7yh119/XStWrLDrY8uWLTp69KgSEhK0dOlSrVq1SrGxsZJ+mdLdr18/DR06VEePHlViYqJ69OghwzBsz09ISNCJEyeUkJCgRYsWKT4+XvHx8bbtMTEx2rNnj9asWaMdO3bIMAx16tRJt27dUosWLTR37lz5+voqOTlZycnJmjhxYhG9k0WD0A0AAAAAZYirq6vi4+O1aNEiVaxYUS1bttTrr7+uQ4cO5WkbExOjfv36qWbNmpo+fboyMzP13XffSZLc3NwUGxurpk2bKjw8XAMGDFBMTEye0O3u7q5PPvlE9erVU+fOnfXXv/5V7777rnJzc5WcnKzbt2+rR48eCgsLU4MGDTRq1Ch5e3vbnl+pUiXNnz9fderUUZcuXdS5c2dt2bJFknT8+HGtWbNGH330kZ566ilFRkbqs88+07lz57R69Wq5u7vLz89PFotFQUFBCgoKsuu7JCB0AwAAAEAZ07NnT50/f15r1qzRs88+q8TERDVq1MjuDLIkNWzY0PZzhQoV5OPjo9TUVNu6999/X02aNFHVqlXl7e2tDz/8UKdPn7brIzIyUl5eXrblqKgoXbt2TWfOnFFkZKSefvppNWjQQL169dKHH36oy5cv2z2/Xr16cnFxsS0HBwfbajh69KhcXV3VrFkz23Z/f3/Vrl1bR48edfwNKkbcSK0kOWQtcHPi1mKpIv99p1kL3ba47+QIAAAAIK/y5curQ4cO6tChg9544w299NJLmjZtmmJiYmxt3Nzc7J5jsViUm5srSVqxYoVeeeUVzZ49W1FRUfLx8dGsWbO0a9euQu3fYrHIxcVFmzZt0vbt27Vx40b94x//0JQpU7Rr1y6Fh4ffs4ZfT0P/NcMwZLFYClWHs3GmGwAAAAAeAnXr1s1zA7OCfPPNN2rRooVGjRqlJ554QjVr1tSJEyfytDt48KBu3LhhW965c6e8vb1VrVo1Sb+E6JYtWyo2Nlb79++Xu7u7Vq1aVeiab9++bRf009LSdOzYMT3++OOSfpnenpOTU+jXVdwI3QAAAABQhqSlpal9+/b69NNPdejQISUlJenzzz/XzJkz1a1bt0L3U7NmTe3Zs0cbNmzQsWPHNHXqVO3evTtPu+zsbA0bNkxHjhzRunXrNG3aNI0ZM0blypXTrl27NH36dO3Zs0enT5/WypUrdfHiRVtgvpeIiAh169ZNw4cP17Zt23Tw4EG9+OKLeuSRR2yvJSwsTNeuXdOWLVt06dIlXb9+/a79HThwQAcOHNC1a9d08eJFHThwQEeOHCn0e+IIppcDAAAAQBni7e2tZs2a6Z133tGJEyd069YtVa9eXcOHD9frr79e6H5GjhypAwcOqE+fPrJYLOrXr59GjRqldevW2bV7+umnFRERodatWysrK0t9+/a1fW2Xr6+vvv76a82dO1cZGRkKDQ3V7NmzFR0dXeg64uLiNHbsWHXp0kXZ2dlq3bq1vvzyS9u09BYtWmjkyJHq06eP0tLSNG3atLt+bdgTTzxh+3nv3r1asmSJQkNDdfLkyULXc78sxt0myZcRGRkZ8vPzU3p6unx9fZ1dTsEOWQvczDXdAAAAQPG6efOmkpKSFB4ervLlyzu7nBInJiZGV65c0erVq51diikKGv/CZk2mlwMAAAAAYBJCNwAAAAAAJuGabgAAAACAQ377vd/IizPdAAAAAACYhNANAAAAAIBJCN0AAAAAAJiE0A0AAAAAgEkI3QAAAAAAmITQDQAAAACASQjdAAAAAACniImJ0QsvvODsMkzF93QDAAAAwP06ZC3e/TW8v/2lpqZq6tSpWrdunS5cuKBKlSopMjJSVqtVUVFR5tToJPHx8Ro3bpyuXLlSYLuVK1dq4cKFOnDggLKyslSvXj1ZrVY9++yzptbHmW4AAAAAKGN69uypgwcPatGiRTp27JjWrFmjtm3b6ueff3Z2aU7z9ddfq0OHDvryyy+1d+9etWvXTl27dtX+/ftN3S+hGwAAAADKkCtXrmjbtm3629/+pnbt2ik0NFRPPvmkJk+erM6dO9vaWSwWffTRR+revbu8vLwUERGhNWvW2Lbn5ORo2LBhCg8Pl6enp2rXrq158+bZ7evO9PDY2FgFBATI19dXI0aMUHZ2tq3NF198oQYNGsjT01P+/v565plnlJmZadfP3//+dwUHB8vf31+jR4/WrVu3bNsuX76sQYMGqVKlSvLy8lJ0dLSOHz8uSUpMTNSQIUOUnp4ui8Uii8Uiq9Wa7/syd+5cTZo0SU2bNlVERISmT5+uiIgI/etf/3L4vS4MQjcAAAAAlCHe3t7y9vbW6tWrlZWVVWDb2NhY9e7dW4cOHVKnTp00YMAA29nw3NxcVatWTStWrNCRI0f0xhtv6PXXX9eKFSvs+tiyZYuOHj2qhIQELV26VKtWrVJsbKwkKTk5Wf369dPQoUN19OhRJSYmqkePHjIMw/b8hIQEnThxQgkJCVq0aJHi4+MVHx9v2x4TE6M9e/ZozZo12rFjhwzDUKdOnXTr1i21aNFCc+fOla+vr5KTk5WcnKyJEycW6n3Kzc3V1atXVbly5UK1dxShGwAAAADKEFdXV8XHx2vRokWqWLGiWrZsqddff12HDh3K0zYmJkb9+vVTzZo1NX36dGVmZuq7776TJLm5uSk2NlZNmzZVeHi4BgwYoJiYmDyh293dXZ988onq1aunzp07669//aveffdd5ebmKjk5Wbdv31aPHj0UFhamBg0aaNSoUfL29rY9v1KlSpo/f77q1KmjLl26qHPnztqyZYsk6fjx41qzZo0++ugjPfXUU4qMjNRnn32mc+fOafXq1XJ3d5efn58sFouCgoIUFBRk13dBZs+erczMTPXu3dvRt7pQCN0AAAAAUMb07NlT58+f15o1a/Tss88qMTFRjRo1sjuDLEkNGza0/VyhQgX5+PgoNTXVtu79999XkyZNVLVqVXl7e+vDDz/U6dOn7fqIjIyUl5eXbTkqKkrXrl3TmTNnFBkZqaeffloNGjRQr1699OGHH+ry5ct2z69Xr55cXFxsy8HBwbYajh49KldXVzVr1sy23d/fX7Vr19bRo0cdfn+WLl0qq9Wq5cuXKyAgwOF+CoPQDQAAAABlUPny5dWhQwe98cYb2r59u2JiYjRt2jS7Nm5ubnbLFotFubm5kqQVK1bolVde0dChQ7Vx40YdOHBAQ4YMsbteuyAWi0UuLi7atGmT1q1bp7p16+of//iHateuraSkpELV8Otp6L9mGIYsFkuh6vit5cuXa9iwYVqxYoWeeeYZh/q4H4RuAAAAAHgI1K1bN88NzAryzTffqEWLFho1apSeeOIJ1axZUydOnMjT7uDBg7px44ZteefOnfL29la1atUk/RKiW7ZsqdjYWO3fv1/u7u5atWpVoWu+ffu2du3aZVuXlpamY8eO6fHHH5f0y/T2nJycQvW3dOlSxcTEaMmSJXY3lTMToRsAAAAAypC0tDS1b99en376qQ4dOqSkpCR9/vnnmjlzprp161bofmrWrKk9e/Zow4YNOnbsmKZOnardu3fnaZedna1hw4bpyJEjWrdunaZNm6YxY8aoXLly2rVrl6ZPn649e/bo9OnTWrlypS5evGgLzPcSERGhbt26afjw4dq2bZsOHjyoF198UY888ojttYSFhenatWvasmWLLl26pOvXr+fb19KlSzVo0CDNnj1bzZs3V0pKilJSUpSenl7o98QRhG4AAAAAKEO8vb3VrFkzvfPOO2rdurXq16+vqVOnavjw4Zo/f36h+xk5cqR69OihPn36qFmzZkpLS9OoUaPytHv66acVERGh1q1bq3fv3uratavta7t8fX319ddfq1OnTqpVq5b+8pe/aPbs2YqOji50HXFxcWrcuLG6dOmiqKgoGYahL7/80jYtvUWLFho5cqT69OmjqlWraubMmfn288EHH+j27dsaPXq0goODbY+xY8cWuhZHWIy7TZIvIzIyMuTn56f09HT5+vo6u5yCHbIWuDlxa7FUkf++06yFbnuXr8UDAAAASp2bN28qKSlJ4eHhKl++vLPLKXFiYmJ05coVrV692tmlmKKg8S9s1uRMNwAAAAAAJiF0AwAAAABgEldnFwAAAAAAKJ1++73fyIsz3QAAAAAAmMSpoXvhwoVq2LChfH195evrq6ioKK1bt8623TAMWa1WhYSEyNPTU23bttXhw4edWDEAAAAAAIXn1NBdrVo1vf3229qzZ4/27Nmj9u3bq1u3brZgPXPmTM2ZM0fz58/X7t27FRQUpA4dOujq1avOLBsAAADAQyY3N9fZJcAJimLcnXpNd9euXe2W33rrLS1cuFA7d+5U3bp1NXfuXE2ZMkU9evSQJC1atEiBgYFasmSJRowY4YySAQAAADxE3N3dVa5cOZ0/f15Vq1aVu7u7LBaLs8uCyQzDUHZ2ti5evKhy5crJ3d3d4b5KzI3UcnJy9PnnnyszM1NRUVFKSkpSSkqKOnbsaGvj4eGhNm3aaPv27YRuAAAAAKYrV66cwsPDlZycrPPnzzu7HBQzLy8v1ahRQ+XKOT5J3Omh+/vvv1dUVJRu3rwpb29vrVq1SnXr1tX27dslSYGBgXbtAwMDderUqbv2l5WVpaysLNtyRkaGOYUDAAAAeCi4u7urRo0aun37tnJycpxdDoqJi4uLXF1dH3hmg9NDd+3atXXgwAFduXJF//u//6vBgwdr69attu2/fYGGYRT4omfMmKHY2FjT6gUAAADw8LFYLHJzc5Obm5uzS0Ep4/SvDHN3d1fNmjXVpEkTzZgxQ5GRkZo3b56CgoIkSSkpKXbtU1NT85z9/rXJkycrPT3d9jhz5oyp9QMAAAAAcDdOD92/ZRiGsrKyFB4erqCgIG3atMm2LTs7W1u3blWLFi3u+nwPDw/bV5DdeQAAAAAA4AxOnV7++uuvKzo6WtWrV9fVq1e1bNkyJSYmav369bJYLBo3bpymT5+uiIgIRUREaPr06fLy8lL//v2dWTYAAAAAAIXi1NB94cIFDRw4UMnJyfLz81PDhg21fv16dejQQZI0adIk3bhxQ6NGjdLly5fVrFkzbdy4UT4+Ps4sGwAAAACAQnFq6P74448L3G6xWGS1WmW1WounIAAAAAAAilCJu6YbAAAAAICygtANAAAAAIBJCN0AAAAAAJiE0A0AAAAAgEkI3QAAAAAAmITQDQAAAACASQjdAAAAAACYhNANAAAAAIBJCN0AAAAAAJiE0A0AAAAAgEkI3QAAAAAAmITQDQAAAACASQjdAAAAAACYhNANAAAAAIBJCN0AAAAAAJiE0A0AAAAAgEkI3QAAAAAAmITQDQAAAACASQjdAAAAAACYhNANAAAAAIBJCN0AAAAAAJiE0A0AAAAAgEkI3QAAAAAAmITQDQAAAACASQjdAAAAAACYhNANAAAAAIBJCN0AAAAAAJiE0A0AAAAAgEkI3QAAAAAAmITQDQAAAACASQjdAAAAAACYhNANAAAAAIBJCN0AAAAAAJiE0A0AAAAAgEkI3QAAAAAAmITQDQAAAACASQjdAAAAAACYhNANAAAAAIBJCN0AAAAAAJiE0A0AAAAAgEkI3QAAAAAAmITQDQAAAACASQjdAAAAAACYhNANAAAAAIBJCN0AAAAAAJiE0A0AAAAAgEkI3QAAAAAAmITQDQAAAACASZwaumfMmKGmTZvKx8dHAQEBeuGFF/Tjjz/atYmJiZHFYrF7NG/e3EkVAwAAAABQeE4N3Vu3btXo0aO1c+dObdq0Sbdv31bHjh2VmZlp1+65555TcnKy7fHll186qWIAAAAAAArP1Zk7X79+vd1yXFycAgICtHfvXrVu3dq23sPDQ0FBQcVdHgAAAAAAD6REXdOdnp4uSapcubLd+sTERAUEBKhWrVoaPny4UlNTnVEeAAAAAAD3xalnun/NMAyNHz9erVq1Uv369W3ro6Oj1atXL4WGhiopKUlTp05V+/bttXfvXnl4eOTpJysrS1lZWbbljIyMYqkfAAAAAIDfKjGhe8yYMTp06JC2bdtmt75Pnz62n+vXr68mTZooNDRUa9euVY8ePfL0M2PGDMXGxppeLwAAAAAA91Iippe//PLLWrNmjRISElStWrUC2wYHBys0NFTHjx/Pd/vkyZOVnp5ue5w5c8aMkgEAAAAAuCennuk2DEMvv/yyVq1apcTERIWHh9/zOWlpaTpz5oyCg4Pz3e7h4ZHvtHMAAAAAAIqbU890jx49Wp9++qmWLFkiHx8fpaSkKCUlRTdu3JAkXbt2TRMnTtSOHTt08uRJJSYmqmvXrqpSpYq6d+/uzNIBAAAAALgnp57pXrhwoSSpbdu2duvj4uIUExMjFxcXff/991q8eLGuXLmi4OBgtWvXTsuXL5ePj48TKgYAAAAAoPCcPr28IJ6entqwYUMxVQMAAAAAQNEqETdSAwAAAACgLCJ0AwAAAABgEkI3AAAAAAAmIXQDAAAAAGASQjcAAAAAACYhdAMAAAAAYBJCNwAAAAAAJiF0AwAAAABgEkI3AAAAAAAmIXQDAAAAAGASQjcAAAAAACYhdAMAAAAAYBJCNwAAAAAAJiF0AwAAAABgEkI3AAAAAAAmIXQDAAAAAGASQjcAAAAAACYhdAMAAAAAYBJCNwAAAAAAJiF0AwAAAABgEkI3AAAAAAAmIXQDAAAAAGASQjcAAAAAACYhdAMAAAAAYBJCNwAAAAAAJiF0AwAAAABgEkI3AAAAAAAmIXQDAAAAAGASQjcAAAAAACYhdAMAAAAAYBJCNwAAAAAAJiF0AwAAAABgEkI3AAAAAAAmIXQDAAAAAGASQjcAAAAAACYhdAMAAAAAYBJCNwAAAAAAJiF0AwAAAABgEkI3AAAAAAAmIXQDAAAAAGASQjcAAAAAACYhdAMAAAAAYBJCNwAAAAAAJiF0AwAAAABgEkI3AAAAAAAmIXQDAAAAAGASQjcAAAAAACYhdAMAAAAAYBKnhu4ZM2aoadOm8vHxUUBAgF544QX9+OOPdm0Mw5DValVISIg8PT3Vtm1bHT582EkVAwAAAABQeE4N3Vu3btXo0aO1c+dObdq0Sbdv31bHjh2VmZlpazNz5kzNmTNH8+fP1+7duxUUFKQOHTro6tWrTqwcAAAAAIB7c3XkSUlJSQoPD3/gna9fv95uOS4uTgEBAdq7d69at24twzA0d+5cTZkyRT169JAkLVq0SIGBgVqyZIlGjBjxwDUAAAAAAGAWh85016xZU+3atdOnn36qmzdvFlkx6enpkqTKlStL+iXcp6SkqGPHjrY2Hh4eatOmjbZv355vH1lZWcrIyLB7AAAAAADgDA6F7oMHD+qJJ57QhAkTFBQUpBEjRui77757oEIMw9D48ePVqlUr1a9fX5KUkpIiSQoMDLRrGxgYaNv2WzNmzJCfn5/tUb169QeqCwAAAAAARzkUuuvXr685c+bo3LlziouLU0pKilq1aqV69eppzpw5unjx4n33OWbMGB06dEhLly7Ns81isdgtG4aRZ90dkydPVnp6uu1x5syZ+64FAAAAAICi8EA3UnN1dVX37t21YsUK/e1vf9OJEyc0ceJEVatWTYMGDVJycnKh+nn55Ze1Zs0aJSQkqFq1arb1QUFBkpTnrHZqamqes993eHh4yNfX1+4BAAAAAIAzPFDo3rNnj0aNGqXg4GDNmTNHEydO1IkTJ/TVV1/p3Llz6tatW4HPNwxDY8aM0cqVK/XVV1/luTlbeHi4goKCtGnTJtu67Oxsbd26VS1atHiQ0gEAAAAAMJ1Ddy+fM2eO4uLi9OOPP6pTp05avHixOnXqpHLlfsnw4eHh+uCDD1SnTp0C+xk9erSWLFmi//t//698fHxsZ7T9/Pzk6ekpi8WicePGafr06YqIiFBERISmT58uLy8v9e/f35HSAQAAAAAoNg6F7oULF2ro0KEaMmSIbQr4b9WoUUMff/zxPfuRpLZt29qtj4uLU0xMjCRp0qRJunHjhkaNGqXLly+rWbNm2rhxo3x8fBwpHQAAAACAYmMxDMNwdhFmysjIkJ+fn9LT00v+9d2HrAVuTtxaLFXkv+80a6HbWgvfFAAAAABKpcJmTYeu6Y6Li9Pnn3+eZ/3nn3+uRYsWOdIlAAAAAABljkOh++2331aVKlXyrA8ICND06dMfuCgAAAAAAMoCh0L3qVOn8txpXJJCQ0N1+vTpBy4KAAAAAICywKHQHRAQoEOHDuVZf/DgQfn7+z9wUQAAAAAAlAUOhe6+ffvqT3/6kxISEpSTk6OcnBx99dVXGjt2rPr27VvUNQIAAAAAUCo59JVhb775pk6dOqWnn35arq6/dJGbm6tBgwZxTTcAAAAAAP+PQ6Hb3d1dy5cv1//5P/9HBw8elKenpxo0aKDQ0NCirg8AAAAAgFLLodB9R61atVSrVq2iqgUAAAAAgDLFodCdk5Oj+Ph4bdmyRampqcrNzbXb/tVXXxVJcQAAAAAAlGYOhe6xY8cqPj5enTt3Vv369WWxWIq6LgAAAAAASj2HQveyZcu0YsUKderUqajrAQAAAACgzHDoK8Pc3d1Vs2bNoq4FAAAAAIAyxaHQPWHCBM2bN0+GYRR1PQAAAAAAlBkOTS/ftm2bEhIStG7dOtWrV09ubm5221euXFkkxQEAAAAAUJo5FLorVqyo7t27F3UtAAAAAACUKQ6F7ri4uKKuAwAAAACAMseha7ol6fbt29q8ebM++OADXb16VZJ0/vx5Xbt2rciKAwAAAACgNHPoTPepU6f03HPP6fTp08rKylKHDh3k4+OjmTNn6ubNm3r//feLuk4AAAAAAEodh850jx07Vk2aNNHly5fl6elpW9+9e3dt2bKlyIoDAAAAAKA0c/ju5d9++63c3d3t1oeGhurcuXNFUhgAAAAAAKWdQ2e6c3NzlZOTk2f92bNn5ePj88BFAQAAAABQFjgUujt06KC5c+fali0Wi65du6Zp06apU6dORVUbAAAAAAClmkPTy9955x21a9dOdevW1c2bN9W/f38dP35cVapU0dKlS4u6RgAAAAAASiWHQndISIgOHDigpUuXat++fcrNzdWwYcM0YMAAuxurAQAAAADwMHModEuSp6enhg4dqqFDhxZlPQAAAAAAlBkOhe7FixcXuH3QoEEOFQMAAAAAQFniUOgeO3as3fKtW7d0/fp1ubu7y8vLi9ANAAAAAIAcvHv55cuX7R7Xrl3Tjz/+qFatWnEjNQAAAAAA/h+HQnd+IiIi9Pbbb+c5Cw4AAAAAwMOqyEK3JLm4uOj8+fNF2SUAAAAAAKWWQ9d0r1mzxm7ZMAwlJydr/vz5atmyZZEUBgAAAABAaedQ6H7hhRfsli0Wi6pWrar27dtr9uzZRVEXAAAAAAClnkOhOzc3t6jrAAAAAACgzCnSa7oBAAAAAMD/z6Ez3ePHjy902zlz5jiyCwAAAAAASj2HQvf+/fu1b98+3b59W7Vr15YkHTt2TC4uLmrUqJGtncViKZoqAQAAAAAohRwK3V27dpWPj48WLVqkSpUqSZIuX76sIUOG6KmnntKECROKtEgAAAAAAEojh67pnj17tmbMmGEL3JJUqVIlvfnmm9y9HAAAAACA/8eh0J2RkaELFy7kWZ+amqqrV68+cFEAAAAAAJQFDoXu7t27a8iQIfriiy909uxZnT17Vl988YWGDRumHj16FHWNAAAAAACUSg5d0/3+++9r4sSJevHFF3Xr1q1fOnJ11bBhwzRr1qwiLRClj9Xq7ArurTTUCAAAAKD0cyh0e3l5acGCBZo1a5ZOnDghwzBUs2ZNVahQoajrAwAAAACg1HJoevkdycnJSk5OVq1atVShQgUZhlFUdQEAAAAAUOo5FLrT0tL09NNPq1atWurUqZOSk5MlSS+99BJfFwYAAAAAwP/jUOh+5ZVX5ObmptOnT8vLy8u2vk+fPlq/fn2RFQcAAAAAQGnm0DXdGzdu1IYNG1StWjW79RERETp16lSRFAYAAAAAQGnn0JnuzMxMuzPcd1y6dEkeHh4PXBQAAAAAAGWBQ6G7devWWrx4sW3ZYrEoNzdXs2bNUrt27Qrdz9dff62uXbsqJCREFotFq1evttseExMji8Vi92jevLkjJQMAAAAAUOwcml4+a9YstW3bVnv27FF2drYmTZqkw4cP6+eff9a3335b6H4yMzMVGRmpIUOGqGfPnvm2ee655xQXF2dbdnd3d6RkAAAAAACKnUOhu27dujp06JAWLlwoFxcXZWZmqkePHho9erSCg4ML3U90dLSio6MLbOPh4aGgoCBHygQAAAAAwKnuO3TfunVLHTt21AcffKDY2FgzarKTmJiogIAAVaxYUW3atNFbb72lgIAA0/cLAAAAAMCDuu/Q7ebmph9++EEWi8WMeuxER0erV69eCg0NVVJSkqZOnar27dtr7969d71hW1ZWlrKysmzLGRkZptcJAAAAAEB+HJpePmjQIH388cd6++23i7oeO3369LH9XL9+fTVp0kShoaFau3atevToke9zZsyYUSxn4B82bf2tzi7hrhLTrM4uAQAAAADy5VDozs7O1kcffaRNmzapSZMmqlChgt32OXPmFElxvxUcHKzQ0FAdP378rm0mT56s8ePH25YzMjJUvXp1U+oBAAAAAKAg9xW6f/rpJ4WFhemHH35Qo0aNJEnHjh2za2PmtPO0tDSdOXOmwJu1eXh48F3hAAAAAIAS4b5Cd0REhJKTk5WQkCDpl+nf7777rgIDAx3a+bVr1/Tf//7XtpyUlKQDBw6ocuXKqly5sqxWq3r27Kng4GCdPHlSr7/+uqpUqaLu3bs7tD8AAAAAAIrTfYVuwzDsltetW6fMzEyHd75nzx61a9fOtnxnWvjgwYO1cOFCff/991q8eLGuXLmi4OBgtWvXTsuXL5ePj4/D+wQAAAAAoLg4dE33Hb8N4ferbdu2BfaxYcOGB+ofAAAAAABnKnc/jS0WS55rtovjq8MAAAAAACiN7nt6eUxMjO1GZTdv3tTIkSPz3L185cqVRVchAAAAAACl1H2F7sGDB9stv/jii0VaDAAAAAAAZcl9he64uDiz6gAAAAAAoMy5r2u6AQAAAABA4RG6AQAAAAAwCaEbAAAAAACTELoBAAAAADAJoRsAAAAAAJMQugEAAAAAMAmhGwAAAAAAkxC6AQAAAAAwCaEbAAAAAACTELoBAAAAADAJoRsAAAAAAJMQugEAAAAAMAmhGwAAAAAAkxC6AQAAAAAwCaEbAAAAAACTELoBAAAAADAJoRsAAAAAAJMQugEAAAAAMAmhGwAAAAAAkxC6AQAAAAAwCaEbAAAAAACTELoBAAAAADAJoRsAAAAAAJMQugEAAAAAMAmhGwAAAAAAkxC6AQAAAAAwCaEbAAAAAACTELoBAAAAADAJoRsAAAAAAJMQugEAAAAAMAmhGwAAAAAAkxC6AQAAAAAwCaEbAAAAAACTELoBAAAAADAJoRsAAAAAAJMQugEAAAAAMAmhGwAAAAAAkxC6AQAAAAAwCaEbAAAAAACTELoBAAAAADAJoRsAAAAAAJMQugEAAAAAMAmhGwAAAAAAkxC6AQAAAAAwiVND99dff62uXbsqJCREFotFq1evtttuGIasVqtCQkLk6emptm3b6vDhw84pFgAAAACA++TU0J2ZmanIyEjNnz8/3+0zZ87UnDlzNH/+fO3evVtBQUHq0KGDrl69WsyVAgAAAABw/1ydufPo6GhFR0fnu80wDM2dO1dTpkxRjx49JEmLFi1SYGCglixZohEjRhRnqQAAAAAA3LcSe013UlKSUlJS1LFjR9s6Dw8PtWnTRtu3b7/r87KyspSRkWH3AAAAAADAGUps6E5JSZEkBQYG2q0PDAy0bcvPjBkz5OfnZ3tUr17d1DoBAAAAALibEhu677BYLHbLhmHkWfdrkydPVnp6uu1x5swZs0sEAAAAACBfTr2muyBBQUGSfjnjHRwcbFufmpqa5+z3r3l4eMjDw8P0+gAAAAAAuJcSe6Y7PDxcQUFB2rRpk21ddna2tm7dqhYtWjixMgAAAAAACsepZ7qvXbum//73v7blpKQkHThwQJUrV1aNGjU0btw4TZ8+XREREYqIiND06dPl5eWl/v37O7FqAAAAAAAKx6mhe8+ePWrXrp1tefz48ZKkwYMHKz4+XpMmTdKNGzc0atQoXb58Wc2aNdPGjRvl4+PjrJIBAAAAACg0p4butm3byjCMu263WCyyWq2yWq3FVxQAAAAAAEWkxF7TDQAAAABAaUfoBgAAAADAJIRuAAAAAABMQugGAAAAAMAkhG4AAAAAAEzi1LuXA0Whrb/1/p90qMjLyKuhtRh2AgAAAKAk40w3AAAAAAAmIXQDAAAAAGASQjcAAAAAACYhdAMAAAAAYBJCNwAAAAAAJiF0AwAAAABgEkI3AAAAAAAmIXQDAAAAAGASQjcAAAAAACYhdAMAAAAAYBJCNwAAAAAAJnF1dgGAMyRuLYZ9rHzwPqzWB+8DAAAAgPNwphsAAAAAAJMQugEAAAAAMAmhGwAAAAAAkxC6AQAAAAAwCaEbAAAAAACTELoBAAAAADAJoRsAAAAAAJMQugEAAAAAMAmhGwAAAAAAkxC6AQAAAAAwCaEbAAAAAACTELoBAAAAADAJoRsAAAAAAJMQugEAAAAAMAmhGwAAAAAAkxC6AQAAAAAwCaEbAAAAAACTELoBAAAAADAJoRsAAAAAAJMQugEAAAAAMAmhGwAAAAAAkxC6AQAAAAAwiauzCwDKqrb+1gfuI/EfD15Hvv2mWYusL2vRdQUAAACUOZzpBgAAAADAJIRuAAAAAABMQugGAAAAAMAkhG4AAAAAAExC6AYAAAAAwCSEbgAAAAAATELoBgAAAADAJCU6dFutVlksFrtHUFCQs8sCAAAAAKBQXJ1dwL3Uq1dPmzdvti27uLg4sRoAAAAAAAqvxIduV1dXzm4DAAAAAEqlEj29XJKOHz+ukJAQhYeHq2/fvvrpp58KbJ+VlaWMjAy7BwAAAAAAzlCiz3Q3a9ZMixcvVq1atXThwgW9+eabatGihQ4fPix/f/98nzNjxgzFxsYWc6VA6dLW31p0nR0quq4kSQ2tRdwhAAAA4DwWwzAMZxdRWJmZmXrsscc0adIkjR8/Pt82WVlZysrKsi1nZGSoevXqSk9Pl6+vb3GV6phD1gI3J24tliqA+9K2TRF3SOgGAABAKZCRkSE/P797Zs0Sfab7typUqKAGDRro+PHjd23j4eEhDw+PYqwKAAAAAID8lfhrun8tKytLR48eVXBwsLNLAQAAAADgnkp06J44caK2bt2qpKQk7dq1S7///e+VkZGhwYMHO7s0AAAAAADuqURPLz979qz69eunS5cuqWrVqmrevLl27typ0NBQZ5cGAAAAAMA9lejQvWzZMmeXAAAAAACAw0r09HIAAAAAAEozQjcAAAAAACYhdAMAAAAAYBJCNwAAAAAAJiF0AwAAAABgkhJ993IAQMljtTq7gsIpLXUCAICyjTPdAAAAAACYhNANAAAAAIBJCN0AAAAAAJiE0A0AAAAAgEkI3QAAAAAAmITQDQAAAACASQjdAAAAAACYhNANAAAAAIBJCN0AAAAAAJiE0A0AAAAAgEkI3QAAAAAAmMTV2QUAKN0StxZxfyuLtr87rFZz+kXB2vpbnbfzQ/fY3tBaHFUAAICHHGe6AQAAAAAwCaEbAAAAAACTELoBAAAAADAJoRsAAAAAAJMQugEAAAAAMAmhGwAAAAAAkxC6AQAAAAAwCaEbAAAAAACTELoBAAAAADAJoRsAAAAAAJO4OrsAAPi1tv5WU/pN/Icp3Ra5tm0cfGJDa1GWAQAAgCLCmW4AAAAAAExC6AYAAAAAwCSEbgAAAAAATELoBgAAAADAJIRuAAAAAABMQugGAAAAAMAkhG4AAAAAAExC6AYAAAAAwCSEbgAAAAAATELoBgAAAADAJIRuAAAAAABM4ursAgAAReCQtdh21da/2HZV5lmt+a9v63+XDU7Sts2vFhpanVUGHjaHrAVuTtxaLFXkv+80a6Ha3e0YRwlzyOrsCu7OpN+5peHfZmmosbA40w0AAAAAgEkI3QAAAAAAmITQDQAAAACASQjdAAAAAACYhNANAAAAAIBJCN0AAAAAAJikVITuBQsWKDw8XOXLl1fjxo31zTffOLskAAAAAADuqcSH7uXLl2vcuHGaMmWK9u/fr6eeekrR0dE6ffq0s0sDAAAAAKBAJT50z5kzR8OGDdNLL72kxx9/XHPnzlX16tW1cOFCZ5cGAAAAAECBSnTozs7O1t69e9WxY0e79R07dtT27dudVBUAAAAAAIXj6uwCCnLp0iXl5OQoMDDQbn1gYKBSUlLyfU5WVpaysrJsy+np6ZKkjIwM8wotKteyCtyceaOY6gDgNBnXnF3BvZWW30X3fC9LwOdC1l1+7WfeKPjzoLjZvZcl4H3DQ6IE/12UlVW444DDpZS4x781pzLpH9HdPn9KktJw/NzJmIZhFNiuRIfuOywWi92yYRh51t0xY8YMxcbG5llfvXp1U2oDAJRWbzu7gFKK9w0o7HHwNocLHtjD+4+oNB0/V69elZ+f3123l+jQXaVKFbm4uOQ5q52amprn7PcdkydP1vjx423Lubm5+vnnn+Xv73/XoF6SZGRkqHr16jpz5ox8fX2dXQ7ywRiVbIxPyccYlXyMUcnG+JR8jFHJxviUfKVljAzD0NWrVxUSElJguxIdut3d3dW4cWNt2rRJ3bt3t63ftGmTunXrlu9zPDw85OHhYbeuYsWKZpZpCl9f3xL9DwyMUUnH+JR8jFHJxxiVbIxPyccYlWyMT8lXGsaooDPcd5To0C1J48eP18CBA9WkSRNFRUXpn//8p06fPq2RI0c6uzQAAAAAAApU4kN3nz59lJaWpr/+9a9KTk5W/fr19eWXXyo0NNTZpQEAAAAAUKASH7oladSoURo1apSzyygWHh4emjZtWp4p8ig5GKOSjfEp+Rijko8xKtkYn5KPMSrZGJ+Sr6yNkcW41/3NAQAAAACAQ8o5uwAAAAAAAMoqQjcAAAAAACYhdAMAAAAAYBJCdwmzYMEChYeHq3z58mrcuLG++eYbZ5dU5s2YMUNNmzaVj4+PAgIC9MILL+jHH3+0axMTEyOLxWL3aN68uV2brKwsvfzyy6pSpYoqVKig559/XmfPni3Ol1JmWa3WPO9/UFCQbbthGLJarQoJCZGnp6fatm2rw4cP2/XB+JgrLCwszxhZLBaNHj1aEsdQcfv666/VtWtXhYSEyGKxaPXq1Xbbi+qYuXz5sgYOHCg/Pz/5+flp4MCBunLlismvrmwoaIxu3bqlV199VQ0aNFCFChUUEhKiQYMG6fz583Z9tG3bNs9x1bdvX7s2jJHj7nUcFdXvNcbIMfcan/w+kywWi2bNmmVrwzFknsL8ff0wfRYRukuQ5cuXa9y4cZoyZYr279+vp556StHR0Tp9+rSzSyvTtm7dqtGjR2vnzp3atGmTbt++rY4dOyozM9Ou3XPPPafk5GTb48svv7TbPm7cOK1atUrLli3Ttm3bdO3aNXXp0kU5OTnF+XLKrHr16tm9/99//71t28yZMzVnzhzNnz9fu3fvVlBQkDp06KCrV6/a2jA+5tq9e7fd+GzatEmS1KtXL1sbjqHik5mZqcjISM2fPz/f7UV1zPTv318HDhzQ+vXrtX79eh04cEADBw40/fWVBQWN0fXr17Vv3z5NnTpV+/bt08qVK3Xs2DE9//zzedoOHz7c7rj64IMP7LYzRo6713EkFc3vNcbIMfcan1+PS3Jysj755BNZLBb17NnTrh3HkDkK8/f1Q/VZZKDEePLJJ42RI0faratTp47x2muvOamih1Nqaqohydi6datt3eDBg41u3brd9TlXrlwx3NzcjGXLltnWnTt3zihXrpyxfv16M8t9KEybNs2IjIzMd1tubq4RFBRkvP3227Z1N2/eNPz8/Iz333/fMAzGxxnGjh1rPPbYY0Zubq5hGBxDziTJWLVqlW25qI6ZI0eOGJKMnTt32trs2LHDkGT85z//MflVlS2/HaP8fPfdd4Yk49SpU7Z1bdq0McaOHXvX5zBGRSe/MSqK32uMUdEozDHUrVs3o3379nbrOIaKz2//vn7YPos4011CZGdna+/everYsaPd+o4dO2r79u1OqurhlJ6eLkmqXLmy3frExEQFBASoVq1aGj58uFJTU23b9u7dq1u3btmNX0hIiOrXr8/4FZHjx48rJCRE4eHh6tu3r3766SdJUlJSklJSUuzeew8PD7Vp08b23jM+xSs7O1uffvqphg4dKovFYlvPMVQyFNUxs2PHDvn5+alZs2a2Ns2bN5efnx9jZoL09HRZLBZVrFjRbv1nn32mKlWqqF69epo4caLdGSLGyHwP+nuNMSoeFy5c0Nq1azVs2LA82ziGisdv/75+2D6LXJ1dAH5x6dIl5eTkKDAw0G59YGCgUlJSnFTVw8cwDI0fP16tWrVS/fr1beujo6PVq1cvhYaGKikpSVOnTlX79u21d+9eeXh4KCUlRe7u7qpUqZJdf4xf0WjWrJkWL16sWrVq6cKFC3rzzTfVokULHT582Pb+5nfsnDp1SpIYn2K2evVqXblyRTExMbZ1HEMlR1EdMykpKQoICMjTf0BAAGNWxG7evKnXXntN/fv3l6+vr239gAEDFB4erqCgIP3www+aPHmyDh48aLu8gzEyV1H8XmOMiseiRYvk4+OjHj162K3nGCoe+f19/bB9FhG6S5hfnxWSfvlH+tt1MM+YMWN06NAhbdu2zW59nz59bD/Xr19fTZo0UWhoqNauXZvnF/ivMX5FIzo62vZzgwYNFBUVpccee0yLFi2y3bTGkWOH8THHxx9/rOjoaIWEhNjWcQyVPEVxzOTXnjErWrdu3VLfvn2Vm5urBQsW2G0bPny47ef69esrIiJCTZo00b59+9SoUSNJjJGZiur3GmNkvk8++UQDBgxQ+fLl7dZzDBWPu/19LT08n0VMLy8hqlSpIhcXlzz/I5Oamprnf4Bgjpdffllr1qxRQkKCqlWrVmDb4OBghYaG6vjx45KkoKAgZWdn6/Lly3btGD9zVKhQQQ0aNNDx48dtdzEv6NhhfIrPqVOntHnzZr300ksFtuMYcp6iOmaCgoJ04cKFPP1fvHiRMSsit27dUu/evZWUlKRNmzbZneXOT6NGjeTm5mZ3XDFGxceR32uMkfm++eYb/fjjj/f8XJI4hsxwt7+vH7bPIkJ3CeHu7q7GjRvbprPcsWnTJrVo0cJJVT0cDMPQmDFjtHLlSn311VcKDw+/53PS0tJ05swZBQcHS5IaN24sNzc3u/FLTk7WDz/8wPiZICsrS0ePHlVwcLBtWtiv3/vs7Gxt3brV9t4zPsUnLi5OAQEB6ty5c4HtOIacp6iOmaioKKWnp+u7776ztdm1a5fS09MZsyJwJ3AfP35cmzdvlr+//z2fc/jwYd26dct2XDFGxcuR32uMkfk+/vhjNW7cWJGRkfdsyzFUdO719/VD91lUzDduQwGWLVtmuLm5GR9//LFx5MgRY9y4cUaFChWMkydPOru0Mu2Pf/yj4efnZyQmJhrJycm2x/Xr1w3DMIyrV68aEyZMMLZv324kJSUZCQkJRlRUlPHII48YGRkZtn5GjhxpVKtWzdi8ebOxb98+o3379kZkZKRx+/ZtZ720MmPChAlGYmKi8dNPPxk7d+40unTpYvj4+NiOjbffftvw8/MzVq5caXz//fdGv379jODgYManmOXk5Bg1atQwXn31Vbv1HEPF7+rVq8b+/fuN/fv3G5KMOXPmGPv377fd+bqojpnnnnvOaNiwobFjxw5jx44dRoMGDYwuXboU++stjQoao1u3bhnPP/+8Ua1aNePAgQN2n01ZWVmGYRjGf//7XyM2NtbYvXu3kZSUZKxdu9aoU6eO8cQTTzBGRaSgMSrK32uMkWPu9XvOMAwjPT3d8PLyMhYuXJjn+RxD5rrX39eG8XB9FhG6S5j33nvPCA0NNdzd3Y1GjRrZfW0VzCEp30dcXJxhGIZx/fp1o2PHjkbVqlUNNzc3o0aNGsbgwYON06dP2/Vz48YNY8yYMUblypUNT09Po0uXLnnawDF9+vQxgoODDTc3NyMkJMTo0aOHcfjwYdv23NxcY9q0aUZQUJDh4eFhtG7d2vj+++/t+mB8zLdhwwZDkvHjjz/arecYKn4JCQn5/l4bPHiwYRhFd8ykpaUZAwYMMHx8fAwfHx9jwIABxuXLl4vpVZZuBY1RUlLSXT+bEhISDMMwjNOnTxutW7c2KleubLi7uxuPPfaY8ac//clIS0uz2w9j5LiCxqgof68xRo651+85wzCMDz74wPD09DSuXLmS5/kcQ+a619/XhvFwfRZZDMMwTDqJDgAAAADAQ41rugEAAAAAMAmhGwAAAAAAkxC6AQAAAAAwCaEbAAAAAACTELoBAAAAADAJoRsAAAAAAJMQugEAAAAAMAmhGwAAAAAAkxC6AQDAA0lMTJTFYtGVK1ecXQoAACUOoRsAgBLuzJkzGjZsmEJCQuTu7q7Q0FCNHTtWaWlpxV5L27ZtNW7cOLt1LVq0UHJysvz8/CRJ8fHxqlixYrHXBgBASUToBgCgBPvpp5/UpEkTHTt2TEuXLtV///tfvf/++9qyZYuioqL0888/O7tEubu7KygoSBaLxdmlAABQ4hC6AQAowUaPHi13d3dt3LhRbdq0UY0aNRQdHa3Nmzfr3LlzmjJliiTJYrFo9erVds+tWLGi4uPjbcuvvvqqatWqJS8vLz366KOaOnWqbt26ZdtutVr1u9/9Tv/zP/+jsLAw+fn5qW/fvrp69aokKSYmRlu3btW8efNksVhksVh08uRJu+nliYmJGjJkiNLT021trFar/vrXv6pBgwZ5Xl/jxo31xhtvFP0bBwBACUHoBgCghPr555+1YcMGjRo1Sp6ennbbgoKCNGDAAC1fvlyGYRSqPx8fH8XHx+vIkSOaN2+ePvzwQ73zzjt2bU6cOKHVq1fr3//+t/79739r69atevvttyVJ8+bNU1RUlIYPH67k5GQlJyerevXqds9v0aKF5s6dK19fX1ubiRMnaujQoTpy5Ih2795ta3vo0CHt379fMTExDrw7AACUDq7OLgAAAOTv+PHjMgxDjz/+eL7bH3/8cV2+fFkXL14sVH9/+ctfbD+HhYVpwoQJWr58uSZNmmRbn5ubq/j4ePn4+EiSBg4cqC1btuitt96Sn5+f3N3d5eXlpaCgoHz34e7uLj8/P1ksFrs23t7eevbZZxUXF6emTZtKkuLi4tSmTRs9+uijhaofAIDSiDPdAACUUnfOcLu7uxeq/RdffKFWrVopKChI3t7emjp1qk6fPm3XJiwszBa4JSk4OFipqalFUu/w4cO1dOlS3bx5U7du3dJnn32moUOHFknfAACUVIRuAABKqJo1a8pisejIkSP5bv/Pf/6jqlWrqmLFirJYLHmmmf/6eu2dO3eqb9++io6O1r///W/t379fU6ZMUXZ2tt1z3Nzc7JYtFotyc3OL5PV07dpVHh4eWrVqlf71r38pKytLPXv2LJK+AQAoqZheDgBACeXv768OHTpowYIFeuWVV+yu605JSdFnn32m0aNHS5KqVq2q5ORk2/bjx4/r+vXrtuVvv/1WoaGhthuvSdKpU6fuuyZ3d3fl5OQ41MbV1VWDBw9WXFycPDw81LdvX3l5ed13DQAAlCaEbgAASrD58+erRYsWevbZZ/Xmm28qPDxchw8f1p///GfVqlXLdufv9u3ba/78+WrevLlyc3P16quv2p21rlmzpk6fPq1ly5apadOmWrt2rVatWnXf9YSFhWnXrl06efKkvL29Vbly5XzbXLt2TVu2bFFkZKS8vLxs4fqll16yXaP+7bffOvKWAABQqjC9HACAEiwiIkK7d+/Wo48+qt69eys0NFTR0dGqVauWvv32W3l7e0uSZs+ererVq6t169bq37+/Jk6caHcWuVu3bnrllVc0ZswY/e53v9P27ds1derU+65n4sSJcnFxUd26dVW1atU814RLv9zBfOTIkerTp4+qVq2qmTNn2r2eFi1aqHbt2mrWrJkD7wgAAKWLxSjs94wAAIASYdq0aZozZ442btyoqKgoZ5dzXwzDUJ06dTRixAiNHz/e2eUAAGA6ppcDAFDKxMbG2qZ5N2vWTOXKlY6Ja6mpqfqf//kfnTt3TkOGDHF2OQAAFAvOdAMAgGJhsVhUpUoVzZs3T/3793d2OQAAFAvOdAMAgGLB//MDAB5GpWM+GgAAAAAApRChGwAAAAAAkxC6AQAAAAAwCaEbAAAAAACTELoBAAAAADAJoRsAAAAAAJMQugEAAAAAMAmhGwAAAAAAkxC6AQAAAAAwyf8HVVhSt1SDKPUAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10, 5))\n", + "plt.hist(before_data['quantity'], bins=20, alpha=0.5, label='Snapshot 1', color='blue')\n", + "plt.hist(pd.to_numeric(after_data['qty'], errors='coerce'), bins=20, alpha=0.5, label='Snapshot 2', color='orange')\n", + "plt.title('Quantity Distribution')\n", + "plt.xlabel('Quantity')\n", + "plt.ylabel('Frequency')\n", + "plt.legend()\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "49408398", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
skuproduct_nameqtywarehouseupdated_at
52SKU-045Multimeter Pro-5.0Warehouse B2024-01-15
\n", + "
" + ], + "text/plain": [ + " sku product_name qty warehouse updated_at\n", + "52 SKU-045 Multimeter Pro -5.0 Warehouse B 2024-01-15" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "after_data[after_data['qty']<=0]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "c994a04a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Out of stock in Snapshot 1: 0\n", + "Empty DataFrame\n", + "Columns: [sku, name, quantity, location]\n", + "Index: []\n", + "\n", + "Out of stock in Snapshot 2: 0\n", + "Empty DataFrame\n", + "Columns: [sku, product_name, qty, warehouse]\n", + "Index: []\n", + "\n", + "Newly out of stock (had stock before, zero now): 0\n", + "Empty DataFrame\n", + "Columns: [sku, name, quantity, qty]\n", + "Index: []\n", + "\n", + "Restocked (was zero, has stock now): 0\n", + "Empty DataFrame\n", + "Columns: [sku, name, quantity, qty]\n", + "Index: []\n" + ] + } + ], + "source": [ + "# Flag items with zero quantity as out_of_stock\n", + "out_of_stock_before = before_data[before_data['quantity'] == 0]\n", + "out_of_stock_after = after_data[after_data['qty'] == 0]\n", + "\n", + "print(f\"Out of stock in Snapshot 1: {len(out_of_stock_before)}\")\n", + "print(out_of_stock_before[['sku', 'name', 'quantity', 'location']])\n", + "\n", + "print(f\"\\nOut of stock in Snapshot 2: {len(out_of_stock_after)}\")\n", + "print(out_of_stock_after[['sku', 'product_name', 'qty', 'warehouse']])\n", + "\n", + "# Compare — items that went TO zero (were in stock before, out of stock now)\n", + "merged = before_data.merge(after_data, on='sku', how='inner')\n", + "newly_out_of_stock = merged[(merged['quantity'] > 0) & (merged['qty'] == 0)]\n", + "print(f\"\\nNewly out of stock (had stock before, zero now): {len(newly_out_of_stock)}\")\n", + "print(newly_out_of_stock[['sku', 'name', 'quantity', 'qty']])\n", + "\n", + "# Compare — items that came BACK in stock (zero before, has stock now)\n", + "restocked = merged[(merged['quantity'] == 0) & (merged['qty'] > 0)]\n", + "print(f\"\\nRestocked (was zero, has stock now): {len(restocked)}\")\n", + "print(restocked[['sku', 'name', 'quantity', 'qty']])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "c389d0ca", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Items that changed location: 1\n", + " sku name location warehouse\n", + "40 SKU-045 Multimeter Pro Warehouse A Warehouse B\n" + ] + } + ], + "source": [ + "location_changed = merged[\n", + " merged['location'].str.strip().str.lower() != merged['warehouse'].str.strip().str.lower()\n", + "]\n", + "\n", + "print(f\"Items that changed location: {len(location_changed)}\")\n", + "print(location_changed[['sku', 'name', 'location', 'warehouse']])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "7bd7c9fa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
skuproduct_nameqtywarehouseupdated_at
42SKU-045Multimeter Professional23.0Warehouse A2024-01-15
52SKU-045Multimeter Pro-5.0Warehouse B2024-01-15
\n", + "
" + ], + "text/plain": [ + " sku product_name qty warehouse updated_at\n", + "42 SKU-045 Multimeter Professional 23.0 Warehouse A 2024-01-15\n", + "52 SKU-045 Multimeter Pro -5.0 Warehouse B 2024-01-15" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "after_data[after_data['sku']=='SKU-045']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcf957f1", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/inventory-reconciliation/NOTES.md b/inventory-reconciliation/NOTES.md new file mode 100644 index 0000000..be7ece0 --- /dev/null +++ b/inventory-reconciliation/NOTES.md @@ -0,0 +1,77 @@ +# Inventory Reconciliation — Notes + +## Approach + +I started with exploratory data analysis in a Jupyter notebook (`ExploratoryDataAnalysis.ipynb`) before writing any reconciliation logic. The goal was to understand the shape, quality, and quirks of both snapshots so the reconciliation script could handle real-world messiness rather than assuming clean inputs. Every data quality issue discovered in EDA directly informed a cleaning step or validation check in `reconcile.py`. + +## EDA Process + +The EDA notebook walked through the data systematically: + +1. **Schema inspection** — Loaded both CSVs and immediately noticed the column names don't match. Snapshot 1 uses `name`, `quantity`, `location`, `last_counted` while snapshot 2 uses `product_name`, `qty`, `warehouse`, `updated_at`. This would silently break any merge without a column mapping step. + +2. **Data types** — Snapshot 1's `quantity` is `int64`, snapshot 2's `qty` is `float64` (values like `70.0`, `80.00`). The date columns loaded as `object` (strings) in both. These mismatches needed normalization before comparison. + +3. **SKU validation** — Checked length consistency, case consistency, whitespace, hidden characters, uniqueness, and format pattern across both datasets. + +4. **Null and disguised null checks** — Scanned for actual nulls and string placeholders like `"N/A"`, `"None"`, `"-"`. + +5. **Duplicate detection** — Found a key duplicate in snapshot 2 (same SKU, different data). + +6. **Date validation** — Checked parseability and found mixed date formats in snapshot 2. + +7. **Cross-snapshot consistency** — Merged on SKU to compare product names between snapshots and found a naming discrepancy. + +8. **Basic statistics and distributions** — Used `describe()` and histograms to spot outliers, including a negative quantity value. + +## Data Quality Issues Found + +| Issue | Count | Case | +|---|---|---| +| Column name mismatch between snapshots | 4 | `name` vs `product_name`, `qty` vs `quantity` | +| Non-standard SKU format (missing dash) | 2 | `SKU005` → `SKU-005`, `SKU018` → `SKU-018` | +| Case inconsistency in SKU | 1 | `sku-008` → `SKU-008` | +| Leading/trailing whitespace in product name | 5 | `" Widget B"`, `"Mounting Bracket Large "`, `" HDMI Cable 3ft "`, `" Compressed Air Can"` | +| Negative quantity | 1 | SKU-045 has qty `-5` (second duplicate row) | +| Non-ISO date format | 1 | `01/15/2024` on SKU-035 in snapshot 2 | +| Duplicate SKU | 1 | SKU-045 appears twice in snapshot 2 with different names and quantities | +| Product name mismatch (same SKU, different name) | 1 | `Multimeter Pro` → `Multimeter Professional` | +| Quantity dtype mismatch | — | `int64` in snapshot 1, `float64` in snapshot 2 | +| SKU whitespace | 0 | Clean in both datasets | +| SKU hidden characters | 0 | Clean in both datasets | + +## How Issues Were Fixed (and Why) + +**Column mapping** — Created a `COLUMN_MAPPING` dictionary that maps both schemas to a unified set of names. This is explicit, maintainable, and easy to update if future snapshots introduce new column names. + +**SKU normalization** — Applied a multi-step cleaning pipeline: strip whitespace, remove hidden characters, uppercase everything, then regex-fix formatting issues like missing hyphens (`SKU005` → `SKU-005`). The order matters — whitespace and case must be handled before format validation, otherwise the regex won't match. + +**Duplicate resolution** — For SKU-045 appearing twice in snapshot 2, we keep the row with the higher quantity as a safe default assumption. However, this is not necessarily the correct business rule. In production, the right approach is to understand the business context and consult the data owner — the duplicate could represent a correction, a return, a multi-location split, or a data entry error, and each scenario calls for a different resolution strategy. The decision is logged in the issues list so it can be revisited. + +**Product name mapping** — Built a dynamic SKU-to-name mapping from both snapshots rather than hardcoding names, since new products may appear in snapshot 2 that don't exist in snapshot 1 (and vice versa for removed items). The mapping combines both datasets so it covers old and new products alike. For conflicts like `Multimeter Pro` vs `Multimeter Professional`, we default to preferring snapshot 1's name as a safe assumption — it's the established record. In production, this preference is easily configurable and should be aligned with the business rule (e.g., always use the latest name, or always defer to a master product catalog). + +**Date parsing** — Used a multi-pass approach: let pandas guess first, then try explicit format strings (`%Y-%m-%d`, `%m/%d/%Y`, etc.) for anything still unparsed. This handles any mixed format issue without hardcoding assumptions about which rows use which format. + +**Quantity normalization** — Coerced all quantities to numeric with `pd.to_numeric(errors='coerce')`, filling unparseable values with 0. This unifies the `int64`/`float64` mismatch and handles any stray strings. + +## Reconciliation Logic + +After cleaning both snapshots, the core reconciliation is a full outer merge on SKU. This ensures every item from both snapshots is represented — items only in snapshot 1, items only in snapshot 2, and items in both. Each row is then classified into one of five categories: + +- **removed** — exists in snapshot 1 but not in snapshot 2 (item dropped from inventory entirely or out of stock probably) +- **added** — exists in snapshot 2 but not in snapshot 1 (new item introduced) +- **increased** — exists in both, quantity went up (restocking) +- **decreased** — exists in both, quantity went down (consumption or sales) +- **unchanged** — exists in both, same quantity + +A `quantity_diff` column captures the numeric delta (after minus before), which is NaN for added/removed items since only one side has a value. + +For consolidated fields (product name, location, date), the report prefers the most current or authoritative value: snapshot 1's name (established record), snapshot 2's location (current state), and snapshot 2's date (most recent). The final report is sorted by change type severity (removed first, unchanged last) for quick scanning. + +## Benefits of the EDA Notebook + +The notebook serves as a living audit trail. Every issue discovered is documented with the code that found it, making the analysis reproducible and reviewable. It also decouples exploration from production logic — the notebook is for understanding the data, `reconcile.py` is for processing it. This separation means the script's cleaning steps aren't guesswork; each one traces back to a specific finding in EDA. + +## AI Tooling + +I used Claude as a thought partner throughout this project. During EDA, I discussed what checks to run for SKU validation, how to detect case inconsistencies, and how to handle mixed date formats. For the reconciliation script, I worked through design decisions and how to structure the merge and classification logic. For testing, Claude helped me think through edge cases like empty DataFrames and verify that my test coverage matched the actual function behavior. The code and decisions are my own, but the iterative back-and-forth helped me move faster and catch things I might have missed. \ No newline at end of file diff --git a/inventory-reconciliation/output/reconciliation_report.csv b/inventory-reconciliation/output/reconciliation_report.csv new file mode 100644 index 0000000..a5f0a8d --- /dev/null +++ b/inventory-reconciliation/output/reconciliation_report.csv @@ -0,0 +1,81 @@ +sku,product_name,location,quantity_before,quantity_after,quantity_diff,last_counted,change_type +SKU-025,VGA Cable,Warehouse B,50.0,,,2024-01-08,removed +SKU-026,DVI Cable,Warehouse B,35.0,,,2024-01-08,removed +SKU-001,Widget A,Warehouse A,150.0,145.0,-5.0,2024-01-15,decreased +SKU-002,Widget B,Warehouse A,75.0,70.0,-5.0,2024-01-15,decreased +SKU-003,Gadget Pro,Warehouse B,200.0,185.0,-15.0,2024-01-15,decreased +SKU-004,Gadget Lite,Warehouse A,50.0,48.0,-2.0,2024-01-15,decreased +SKU-005,Connector Cable 6ft,Warehouse C,500.0,480.0,-20.0,2024-01-15,decreased +SKU-008,Power Supply Unit Pro,Warehouse A,45.0,42.0,-3.0,2024-01-15,decreased +SKU-009,Mounting Bracket Small,Warehouse B,1000.0,975.0,-25.0,2024-01-15,decreased +SKU-010,Mounting Bracket Large,Warehouse B,750.0,720.0,-30.0,2024-01-15,decreased +SKU-011,LED Panel 12x12,Warehouse A,120.0,115.0,-5.0,2024-01-15,decreased +SKU-012,LED Panel 24x24,Warehouse A,90.0,85.0,-5.0,2024-01-15,decreased +SKU-013,Thermal Paste Tube,Warehouse C,2000.0,1850.0,-150.0,2024-01-15,decreased +SKU-014,Cooling Fan 80mm,Warehouse B,300.0,290.0,-10.0,2024-01-15,decreased +SKU-015,Cooling Fan 120mm,Warehouse B,250.0,245.0,-5.0,2024-01-15,decreased +SKU-016,USB Hub 4-Port,Warehouse A,180.0,165.0,-15.0,2024-01-15,decreased +SKU-017,USB Hub 7-Port,Warehouse A,95.0,88.0,-7.0,2024-01-15,decreased +SKU-018,Ethernet Cable Cat5,Warehouse C,800.0,750.0,-50.0,2024-01-15,decreased +SKU-019,Ethernet Cable Cat6,Warehouse C,600.0,580.0,-20.0,2024-01-15,decreased +SKU-020,Ethernet Cable Cat6a,Warehouse C,400.0,390.0,-10.0,2024-01-15,decreased +SKU-021,HDMI Cable 3ft,Warehouse A,450.0,425.0,-25.0,2024-01-15,decreased +SKU-022,HDMI Cable 6ft,Warehouse A,380.0,365.0,-15.0,2024-01-15,decreased +SKU-023,HDMI Cable 10ft,Warehouse A,220.0,210.0,-10.0,2024-01-15,decreased +SKU-024,DisplayPort Cable,Warehouse A,175.0,170.0,-5.0,2024-01-15,decreased +SKU-027,Audio Cable 3.5mm,Warehouse C,600.0,575.0,-25.0,2024-01-15,decreased +SKU-028,Audio Cable RCA,Warehouse C,400.0,385.0,-15.0,2024-01-15,decreased +SKU-029,Optical Audio Cable,Warehouse C,150.0,145.0,-5.0,2024-01-15,decreased +SKU-030,Surge Protector 6-Outlet,Warehouse A,200.0,188.0,-12.0,2024-01-15,decreased +SKU-031,Surge Protector 12-Outlet,Warehouse A,120.0,112.0,-8.0,2024-01-15,decreased +SKU-032,Extension Cord 10ft,Warehouse B,300.0,285.0,-15.0,2024-01-15,decreased +SKU-033,Extension Cord 25ft,Warehouse B,180.0,172.0,-8.0,2024-01-15,decreased +SKU-034,Power Strip,Warehouse A,250.0,240.0,-10.0,2024-01-15,decreased +SKU-035,Cable Ties 100pk,Warehouse C,1500.0,1420.0,-80.0,2024-01-15,decreased +SKU-036,Cable Ties 500pk,Warehouse C,400.0,385.0,-15.0,2024-01-15,decreased +SKU-037,Velcro Straps 50pk,Warehouse C,800.0,765.0,-35.0,2024-01-15,decreased +SKU-038,Label Maker,Warehouse A,25.0,22.0,-3.0,2024-01-15,decreased +SKU-039,Label Tape,Warehouse A,200.0,185.0,-15.0,2024-01-15,decreased +SKU-040,Screwdriver Set,Warehouse B,150.0,142.0,-8.0,2024-01-15,decreased +SKU-041,Precision Screwdriver Set,Warehouse B,100.0,95.0,-5.0,2024-01-15,decreased +SKU-042,Wire Stripper,Warehouse B,75.0,70.0,-5.0,2024-01-15,decreased +SKU-043,Crimping Tool,Warehouse B,60.0,58.0,-2.0,2024-01-15,decreased +SKU-044,Multimeter Basic,Warehouse A,40.0,35.0,-5.0,2024-01-15,decreased +SKU-045,Multimeter Pro,Warehouse A,25.0,23.0,-2.0,2024-01-15,decreased +SKU-046,Soldering Iron,Warehouse B,35.0,32.0,-3.0,2024-01-15,decreased +SKU-047,Solder Wire,Warehouse B,300.0,280.0,-20.0,2024-01-15,decreased +SKU-048,Heat Shrink Tubing,Warehouse C,500.0,475.0,-25.0,2024-01-15,decreased +SKU-049,Electrical Tape,Warehouse C,800.0,760.0,-40.0,2024-01-15,decreased +SKU-050,Anti-Static Wrist Strap,Warehouse A,200.0,190.0,-10.0,2024-01-15,decreased +SKU-051,Anti-Static Mat,Warehouse A,50.0,48.0,-2.0,2024-01-15,decreased +SKU-052,Compressed Air Can,Warehouse C,400.0,375.0,-25.0,2024-01-15,decreased +SKU-053,Isopropyl Alcohol 99%,Warehouse C,150.0,140.0,-10.0,2024-01-15,decreased +SKU-054,Microfiber Cloth 10pk,Warehouse C,300.0,285.0,-15.0,2024-01-15,decreased +SKU-055,Screen Cleaner,Warehouse C,250.0,235.0,-15.0,2024-01-15,decreased +SKU-056,Keyboard Cleaner Gel,Warehouse C,180.0,168.0,-12.0,2024-01-15,decreased +SKU-057,Monitor Stand,Warehouse A,45.0,42.0,-3.0,2024-01-15,decreased +SKU-058,Laptop Stand,Warehouse A,60.0,58.0,-2.0,2024-01-15,decreased +SKU-059,Tablet Stand,Warehouse A,80.0,75.0,-5.0,2024-01-15,decreased +SKU-060,Phone Stand,Warehouse A,120.0,115.0,-5.0,2024-01-15,decreased +SKU-061,Desk Organizer,Warehouse B,90.0,85.0,-5.0,2024-01-15,decreased +SKU-062,Cable Management Box,Warehouse B,110.0,105.0,-5.0,2024-01-15,decreased +SKU-063,Headphone Hook,Warehouse B,200.0,192.0,-8.0,2024-01-15,decreased +SKU-064,Webcam Mount,Warehouse A,75.0,72.0,-3.0,2024-01-15,decreased +SKU-065,Ring Light 10in,Warehouse A,40.0,38.0,-2.0,2024-01-15,decreased +SKU-066,Ring Light 18in,Warehouse A,25.0,22.0,-3.0,2024-01-15,decreased +SKU-067,Tripod Small,Warehouse B,55.0,52.0,-3.0,2024-01-15,decreased +SKU-068,Tripod Large,Warehouse B,35.0,33.0,-2.0,2024-01-15,decreased +SKU-069,Green Screen,Warehouse A,20.0,18.0,-2.0,2024-01-15,decreased +SKU-070,Backdrop Stand,Warehouse A,15.0,12.0,-3.0,2024-01-15,decreased +SKU-071,USB Microphone,Warehouse A,30.0,28.0,-2.0,2024-01-15,decreased +SKU-072,XLR Microphone,Warehouse A,20.0,18.0,-2.0,2024-01-15,decreased +SKU-073,Pop Filter,Warehouse B,100.0,95.0,-5.0,2024-01-15,decreased +SKU-074,Boom Arm,Warehouse B,45.0,42.0,-3.0,2024-01-15,decreased +SKU-075,Shock Mount,Warehouse B,40.0,38.0,-2.0,2024-01-15,decreased +SKU-076,Stream Deck Mini,Warehouse A,,15.0,,2024-01-15,added +SKU-077,Stream Deck XL,Warehouse A,,8.0,,2024-01-15,added +SKU-078,Capture Card,Warehouse A,,12.0,,2024-01-15,added +SKU-079,USB-C Hub,Warehouse A,,45.0,,2024-01-15,added +SKU-080,Thunderbolt Cable,Warehouse A,,30.0,,2024-01-15,added +SKU-006,Connector Cable 10ft,Warehouse C,350.0,350.0,0.0,2024-01-15,unchanged +SKU-007,Power Supply Unit,Warehouse A,80.0,80.0,0.0,2024-01-15,unchanged diff --git a/inventory-reconciliation/output/reconciliation_report.json b/inventory-reconciliation/output/reconciliation_report.json new file mode 100644 index 0000000..04e71b0 --- /dev/null +++ b/inventory-reconciliation/output/reconciliation_report.json @@ -0,0 +1,802 @@ +[ + { + "sku":"SKU-025", + "product_name":"VGA Cable", + "location":"Warehouse B", + "quantity_before":50.0, + "quantity_after":null, + "quantity_diff":null, + "last_counted":"2024-01-08", + "change_type":"removed" + }, + { + "sku":"SKU-026", + "product_name":"DVI Cable", + "location":"Warehouse B", + "quantity_before":35.0, + "quantity_after":null, + "quantity_diff":null, + "last_counted":"2024-01-08", + "change_type":"removed" + }, + { + "sku":"SKU-001", + "product_name":"Widget A", + "location":"Warehouse A", + "quantity_before":150.0, + "quantity_after":145.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-002", + "product_name":"Widget B", + "location":"Warehouse A", + "quantity_before":75.0, + "quantity_after":70.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-003", + "product_name":"Gadget Pro", + "location":"Warehouse B", + "quantity_before":200.0, + "quantity_after":185.0, + "quantity_diff":-15.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-004", + "product_name":"Gadget Lite", + "location":"Warehouse A", + "quantity_before":50.0, + "quantity_after":48.0, + "quantity_diff":-2.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-005", + "product_name":"Connector Cable 6ft", + "location":"Warehouse C", + "quantity_before":500.0, + "quantity_after":480.0, + "quantity_diff":-20.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-008", + "product_name":"Power Supply Unit Pro", + "location":"Warehouse A", + "quantity_before":45.0, + "quantity_after":42.0, + "quantity_diff":-3.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-009", + "product_name":"Mounting Bracket Small", + "location":"Warehouse B", + "quantity_before":1000.0, + "quantity_after":975.0, + "quantity_diff":-25.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-010", + "product_name":"Mounting Bracket Large", + "location":"Warehouse B", + "quantity_before":750.0, + "quantity_after":720.0, + "quantity_diff":-30.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-011", + "product_name":"LED Panel 12x12", + "location":"Warehouse A", + "quantity_before":120.0, + "quantity_after":115.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-012", + "product_name":"LED Panel 24x24", + "location":"Warehouse A", + "quantity_before":90.0, + "quantity_after":85.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-013", + "product_name":"Thermal Paste Tube", + "location":"Warehouse C", + "quantity_before":2000.0, + "quantity_after":1850.0, + "quantity_diff":-150.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-014", + "product_name":"Cooling Fan 80mm", + "location":"Warehouse B", + "quantity_before":300.0, + "quantity_after":290.0, + "quantity_diff":-10.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-015", + "product_name":"Cooling Fan 120mm", + "location":"Warehouse B", + "quantity_before":250.0, + "quantity_after":245.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-016", + "product_name":"USB Hub 4-Port", + "location":"Warehouse A", + "quantity_before":180.0, + "quantity_after":165.0, + "quantity_diff":-15.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-017", + "product_name":"USB Hub 7-Port", + "location":"Warehouse A", + "quantity_before":95.0, + "quantity_after":88.0, + "quantity_diff":-7.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-018", + "product_name":"Ethernet Cable Cat5", + "location":"Warehouse C", + "quantity_before":800.0, + "quantity_after":750.0, + "quantity_diff":-50.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-019", + "product_name":"Ethernet Cable Cat6", + "location":"Warehouse C", + "quantity_before":600.0, + "quantity_after":580.0, + "quantity_diff":-20.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-020", + "product_name":"Ethernet Cable Cat6a", + "location":"Warehouse C", + "quantity_before":400.0, + "quantity_after":390.0, + "quantity_diff":-10.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-021", + "product_name":"HDMI Cable 3ft", + "location":"Warehouse A", + "quantity_before":450.0, + "quantity_after":425.0, + "quantity_diff":-25.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-022", + "product_name":"HDMI Cable 6ft", + "location":"Warehouse A", + "quantity_before":380.0, + "quantity_after":365.0, + "quantity_diff":-15.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-023", + "product_name":"HDMI Cable 10ft", + "location":"Warehouse A", + "quantity_before":220.0, + "quantity_after":210.0, + "quantity_diff":-10.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-024", + "product_name":"DisplayPort Cable", + "location":"Warehouse A", + "quantity_before":175.0, + "quantity_after":170.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-027", + "product_name":"Audio Cable 3.5mm", + "location":"Warehouse C", + "quantity_before":600.0, + "quantity_after":575.0, + "quantity_diff":-25.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-028", + "product_name":"Audio Cable RCA", + "location":"Warehouse C", + "quantity_before":400.0, + "quantity_after":385.0, + "quantity_diff":-15.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-029", + "product_name":"Optical Audio Cable", + "location":"Warehouse C", + "quantity_before":150.0, + "quantity_after":145.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-030", + "product_name":"Surge Protector 6-Outlet", + "location":"Warehouse A", + "quantity_before":200.0, + "quantity_after":188.0, + "quantity_diff":-12.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-031", + "product_name":"Surge Protector 12-Outlet", + "location":"Warehouse A", + "quantity_before":120.0, + "quantity_after":112.0, + "quantity_diff":-8.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-032", + "product_name":"Extension Cord 10ft", + "location":"Warehouse B", + "quantity_before":300.0, + "quantity_after":285.0, + "quantity_diff":-15.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-033", + "product_name":"Extension Cord 25ft", + "location":"Warehouse B", + "quantity_before":180.0, + "quantity_after":172.0, + "quantity_diff":-8.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-034", + "product_name":"Power Strip", + "location":"Warehouse A", + "quantity_before":250.0, + "quantity_after":240.0, + "quantity_diff":-10.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-035", + "product_name":"Cable Ties 100pk", + "location":"Warehouse C", + "quantity_before":1500.0, + "quantity_after":1420.0, + "quantity_diff":-80.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-036", + "product_name":"Cable Ties 500pk", + "location":"Warehouse C", + "quantity_before":400.0, + "quantity_after":385.0, + "quantity_diff":-15.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-037", + "product_name":"Velcro Straps 50pk", + "location":"Warehouse C", + "quantity_before":800.0, + "quantity_after":765.0, + "quantity_diff":-35.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-038", + "product_name":"Label Maker", + "location":"Warehouse A", + "quantity_before":25.0, + "quantity_after":22.0, + "quantity_diff":-3.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-039", + "product_name":"Label Tape", + "location":"Warehouse A", + "quantity_before":200.0, + "quantity_after":185.0, + "quantity_diff":-15.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-040", + "product_name":"Screwdriver Set", + "location":"Warehouse B", + "quantity_before":150.0, + "quantity_after":142.0, + "quantity_diff":-8.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-041", + "product_name":"Precision Screwdriver Set", + "location":"Warehouse B", + "quantity_before":100.0, + "quantity_after":95.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-042", + "product_name":"Wire Stripper", + "location":"Warehouse B", + "quantity_before":75.0, + "quantity_after":70.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-043", + "product_name":"Crimping Tool", + "location":"Warehouse B", + "quantity_before":60.0, + "quantity_after":58.0, + "quantity_diff":-2.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-044", + "product_name":"Multimeter Basic", + "location":"Warehouse A", + "quantity_before":40.0, + "quantity_after":35.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-045", + "product_name":"Multimeter Pro", + "location":"Warehouse A", + "quantity_before":25.0, + "quantity_after":23.0, + "quantity_diff":-2.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-046", + "product_name":"Soldering Iron", + "location":"Warehouse B", + "quantity_before":35.0, + "quantity_after":32.0, + "quantity_diff":-3.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-047", + "product_name":"Solder Wire", + "location":"Warehouse B", + "quantity_before":300.0, + "quantity_after":280.0, + "quantity_diff":-20.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-048", + "product_name":"Heat Shrink Tubing", + "location":"Warehouse C", + "quantity_before":500.0, + "quantity_after":475.0, + "quantity_diff":-25.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-049", + "product_name":"Electrical Tape", + "location":"Warehouse C", + "quantity_before":800.0, + "quantity_after":760.0, + "quantity_diff":-40.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-050", + "product_name":"Anti-Static Wrist Strap", + "location":"Warehouse A", + "quantity_before":200.0, + "quantity_after":190.0, + "quantity_diff":-10.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-051", + "product_name":"Anti-Static Mat", + "location":"Warehouse A", + "quantity_before":50.0, + "quantity_after":48.0, + "quantity_diff":-2.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-052", + "product_name":"Compressed Air Can", + "location":"Warehouse C", + "quantity_before":400.0, + "quantity_after":375.0, + "quantity_diff":-25.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-053", + "product_name":"Isopropyl Alcohol 99%", + "location":"Warehouse C", + "quantity_before":150.0, + "quantity_after":140.0, + "quantity_diff":-10.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-054", + "product_name":"Microfiber Cloth 10pk", + "location":"Warehouse C", + "quantity_before":300.0, + "quantity_after":285.0, + "quantity_diff":-15.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-055", + "product_name":"Screen Cleaner", + "location":"Warehouse C", + "quantity_before":250.0, + "quantity_after":235.0, + "quantity_diff":-15.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-056", + "product_name":"Keyboard Cleaner Gel", + "location":"Warehouse C", + "quantity_before":180.0, + "quantity_after":168.0, + "quantity_diff":-12.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-057", + "product_name":"Monitor Stand", + "location":"Warehouse A", + "quantity_before":45.0, + "quantity_after":42.0, + "quantity_diff":-3.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-058", + "product_name":"Laptop Stand", + "location":"Warehouse A", + "quantity_before":60.0, + "quantity_after":58.0, + "quantity_diff":-2.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-059", + "product_name":"Tablet Stand", + "location":"Warehouse A", + "quantity_before":80.0, + "quantity_after":75.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-060", + "product_name":"Phone Stand", + "location":"Warehouse A", + "quantity_before":120.0, + "quantity_after":115.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-061", + "product_name":"Desk Organizer", + "location":"Warehouse B", + "quantity_before":90.0, + "quantity_after":85.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-062", + "product_name":"Cable Management Box", + "location":"Warehouse B", + "quantity_before":110.0, + "quantity_after":105.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-063", + "product_name":"Headphone Hook", + "location":"Warehouse B", + "quantity_before":200.0, + "quantity_after":192.0, + "quantity_diff":-8.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-064", + "product_name":"Webcam Mount", + "location":"Warehouse A", + "quantity_before":75.0, + "quantity_after":72.0, + "quantity_diff":-3.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-065", + "product_name":"Ring Light 10in", + "location":"Warehouse A", + "quantity_before":40.0, + "quantity_after":38.0, + "quantity_diff":-2.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-066", + "product_name":"Ring Light 18in", + "location":"Warehouse A", + "quantity_before":25.0, + "quantity_after":22.0, + "quantity_diff":-3.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-067", + "product_name":"Tripod Small", + "location":"Warehouse B", + "quantity_before":55.0, + "quantity_after":52.0, + "quantity_diff":-3.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-068", + "product_name":"Tripod Large", + "location":"Warehouse B", + "quantity_before":35.0, + "quantity_after":33.0, + "quantity_diff":-2.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-069", + "product_name":"Green Screen", + "location":"Warehouse A", + "quantity_before":20.0, + "quantity_after":18.0, + "quantity_diff":-2.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-070", + "product_name":"Backdrop Stand", + "location":"Warehouse A", + "quantity_before":15.0, + "quantity_after":12.0, + "quantity_diff":-3.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-071", + "product_name":"USB Microphone", + "location":"Warehouse A", + "quantity_before":30.0, + "quantity_after":28.0, + "quantity_diff":-2.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-072", + "product_name":"XLR Microphone", + "location":"Warehouse A", + "quantity_before":20.0, + "quantity_after":18.0, + "quantity_diff":-2.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-073", + "product_name":"Pop Filter", + "location":"Warehouse B", + "quantity_before":100.0, + "quantity_after":95.0, + "quantity_diff":-5.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-074", + "product_name":"Boom Arm", + "location":"Warehouse B", + "quantity_before":45.0, + "quantity_after":42.0, + "quantity_diff":-3.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-075", + "product_name":"Shock Mount", + "location":"Warehouse B", + "quantity_before":40.0, + "quantity_after":38.0, + "quantity_diff":-2.0, + "last_counted":"2024-01-15", + "change_type":"decreased" + }, + { + "sku":"SKU-076", + "product_name":"Stream Deck Mini", + "location":"Warehouse A", + "quantity_before":null, + "quantity_after":15.0, + "quantity_diff":null, + "last_counted":"2024-01-15", + "change_type":"added" + }, + { + "sku":"SKU-077", + "product_name":"Stream Deck XL", + "location":"Warehouse A", + "quantity_before":null, + "quantity_after":8.0, + "quantity_diff":null, + "last_counted":"2024-01-15", + "change_type":"added" + }, + { + "sku":"SKU-078", + "product_name":"Capture Card", + "location":"Warehouse A", + "quantity_before":null, + "quantity_after":12.0, + "quantity_diff":null, + "last_counted":"2024-01-15", + "change_type":"added" + }, + { + "sku":"SKU-079", + "product_name":"USB-C Hub", + "location":"Warehouse A", + "quantity_before":null, + "quantity_after":45.0, + "quantity_diff":null, + "last_counted":"2024-01-15", + "change_type":"added" + }, + { + "sku":"SKU-080", + "product_name":"Thunderbolt Cable", + "location":"Warehouse A", + "quantity_before":null, + "quantity_after":30.0, + "quantity_diff":null, + "last_counted":"2024-01-15", + "change_type":"added" + }, + { + "sku":"SKU-006", + "product_name":"Connector Cable 10ft", + "location":"Warehouse C", + "quantity_before":350.0, + "quantity_after":350.0, + "quantity_diff":0.0, + "last_counted":"2024-01-15", + "change_type":"unchanged" + }, + { + "sku":"SKU-007", + "product_name":"Power Supply Unit", + "location":"Warehouse A", + "quantity_before":80.0, + "quantity_after":80.0, + "quantity_diff":0.0, + "last_counted":"2024-01-15", + "change_type":"unchanged" + } +] \ No newline at end of file diff --git a/inventory-reconciliation/reconcile.py b/inventory-reconciliation/reconcile.py new file mode 100644 index 0000000..c7d61d1 --- /dev/null +++ b/inventory-reconciliation/reconcile.py @@ -0,0 +1,296 @@ +import pandas as pd +import os +import re + +def load_data(file_1_path, file_2_path): + """ + Reads the two dataset snapshots from the data folder. + """ + print(f"Loading {file_1_path}...") + before_data = pd.read_csv(file_1_path) + + print(f"Loading {file_2_path}...") + after_data = pd.read_csv(file_2_path) + + return before_data, after_data + +# Supports both snapshot schemas — maps variant column names to a unified set. +COLUMN_MAPPING = { + 'sku': 'sku', + 'name': 'product_name', + 'quantity': 'quantity', + 'location': 'location', + 'last_counted': 'last_counted', + 'product_name': 'product_name', + 'qty': 'quantity', + 'warehouse': 'location', + 'updated_at': 'last_counted' +} + + +def parse_mixed_dates(series): + """Try multiple formats, fill in progressively.""" + formats = [ + '%Y-%m-%d', # 2024-05-11 + '%m/%d/%Y', # 01/15/2024 + '%d-%m-%Y', # 15-01-2024 + '%Y/%m/%d', # 2024/01/15 + ] + + result = pd.to_datetime(series, errors='coerce') + + # For anything still NaT, try each format explicitly + for fmt in formats: + still_missing = result.isna() & series.notna() + if not still_missing.any(): + break + result[still_missing] = pd.to_datetime(series[still_missing], format=fmt, errors='coerce') + result = result.dt.strftime('%Y-%m-%d') + + return result + + +def normalize_dtypes(dataframe): + """Standardize column types.""" + + # Strings + str_cols = ['sku', 'product_name', 'location'] + for col in str_cols: + if col in dataframe.columns: + dataframe[col] = dataframe[col].astype(str).str.strip() + + # Numeric columns + if 'quantity' in dataframe.columns: + dataframe['quantity'] = pd.to_numeric(dataframe['quantity'], errors='coerce').fillna(0).apply(float) + + # Date + if 'last_counted' in dataframe.columns: + dataframe['last_counted'] = parse_mixed_dates(dataframe['last_counted']) + + return dataframe + +def validate_clean_sku(df, label='dataset'): + """Validate and clean SKU column. Returns cleaned df and a list of issues found.""" + issues = [] + + # Whitespace and hidden characters + original = df['sku'].copy() + df['sku'] = df['sku'].str.strip() + whitespace_count = (original != df['sku']).sum() + if whitespace_count > 0: + issues.append(f"{whitespace_count} SKUs had whitespace issues") + + hidden = df['sku'].str.contains(r'[^\x20-\x7E]', regex=True) + if hidden.any(): + issues.append(f"{hidden.sum()} SKUs had hidden characters: {df[hidden]['sku'].apply(repr).tolist()}") + df['sku'] = df['sku'].str.replace(r'[^\x20-\x7E]', '', regex=True) + + # Case consistency + all_upper = (df['sku'] == df['sku'].str.upper()).all() + all_lower = (df['sku'] == df['sku'].str.lower()).all() + if not all_upper and not all_lower: + issues.append(f"Mixed case detected — normalizing to uppercase") + df['sku'] = df['sku'].str.upper() + + # Format validation — must be SKU-NNN + expected_pattern = r'^SKU-\d{3}$' + valid_format = df['sku'].str.match(expected_pattern) + if not valid_format.all(): + bad_skus = df[~valid_format]['sku'].tolist() + issues.append(f"Non-standard format SKUs: {bad_skus}") + + # Fix: extract prefix letters and trailing digits, rebuild as SKU-NNN + def fix_sku(sku): + match = re.match(r'^([A-Z]+)-?(\d+)$', sku) + if match: + prefix, digits = match.groups() + return f"{prefix}-{digits.zfill(3)}" + return sku # leave unchanged if totally unrecognizable + + df['sku'] = df['sku'].apply(fix_sku) + + fixed = df['sku'].str.match(expected_pattern) + still_bad = df[~fixed]['sku'].tolist() + if still_bad: + issues.append(f"Could not fix these SKUs: {still_bad}") + else: + issues.append(f"All SKUs normalized to SKU-NNN format") + + # Length consistency + lengths = df['sku'].str.len() + if lengths.nunique() > 1: + length_counts = lengths.value_counts().to_dict() + issues.append(f"Inconsistent SKU lengths: {length_counts}") + common_len = lengths.mode()[0] + odd_skus = df[lengths != common_len]['sku'].tolist() + issues.append(f"Non-standard length SKUs: {odd_skus}") + + # Uniqueness + dupes = df['sku'].duplicated(keep=False) + if dupes.any(): + dupe_skus = df[dupes]['sku'].unique().tolist() + issues.append(f"{len(dupe_skus)} duplicate SKUs found: {dupe_skus}") + + # Keep the row with higher quantity + df = df.sort_values('quantity', ascending=False).drop_duplicates(subset='sku', keep='first').reset_index(drop=True) + issues.append(f"Duplicates resolved — kept highest quantity for each SKU") + + # Summary + if issues: + print(f"\n[{label}] SKU issues found:") + for issue in issues: + print(f" - {issue}") + else: + print(f"\n[{label}] SKUs are clean") + + return df, issues + +def generate_sku_product_mapping(df1, df2, label1='Snapshot 1', label2='Snapshot 2'): + """Build SKU to product_name mapping from both datasets. Flag mismatches.""" + issues = [] + + # Clean names before comparing + df1 = df1.copy() + df2 = df2.copy() + # remove the leading and tailing whitespace first. + df1['product_name'] = df1['product_name'].str.strip() + df2['product_name'] = df2['product_name'].str.strip() + + # Get SKU-name pairs from both + map1 = df1.set_index('sku')['product_name'].to_dict() + map2 = df2.set_index('sku')['product_name'].to_dict() + + # Find mismatches — same SKU, different name + common_skus = set(map1.keys()) & set(map2.keys()) + mismatches = { + sku: (map1[sku], map2[sku]) + for sku in common_skus + if map1[sku] != map2[sku] + } + + if mismatches: + issues.append(f"{len(mismatches)} SKU-name mismatches found:") + for sku, (name1, name2) in mismatches.items(): + issues.append(f" {sku}: '{name1}' ({label1}) vs '{name2}' ({label2})") + + # Build final mapping — prefer df1's name, fallback to df2 + mapping = {**map2, **map1} + + if issues: + print("\nSKU-Name mapping issues:") + for issue in issues: + print(f" - {issue}") + else: + print("\nSKU-Name mapping: all consistent") + + return mapping, mismatches + + +def reconciliation(before, after): + """ + Perform outer merge on SKU and return a single reconciliation report DataFrame. + Each row includes before/after quantities, the computed difference, and a change_type label. + """ + print(f"Before merge, shape of before: {before.shape}") + print(f"Before merge, shape of after: {after.shape}") + + merged = before.merge(after, how="outer", on='sku', suffixes=('_before', '_after'), indicator=True) + print(f"After merging, shape: {merged.shape}") + + def classify_change(row): + if row['_merge'] == 'left_only': + return 'removed' + elif row['_merge'] == 'right_only': + if row['quantity_after'] == 0: + return 'out_of_stock' + return 'added' + else: + if row['quantity_after'] == 0: + return 'out_of_stock' + elif row['quantity_before'] < row['quantity_after']: + return 'increased' + elif row['quantity_before'] > row['quantity_after']: + return 'decreased' + else: + return 'unchanged' + + merged['change_type'] = merged.apply(classify_change, axis=1) + + # Compute quantity difference (after - before), NaN for added/removed items + merged['quantity_diff'] = merged['quantity_after'] - merged['quantity_before'] + + + # Consolidate product_name: prefer before, fall back to after + merged['product_name'] = merged['product_name_before'].fillna(merged['product_name_after']) + + # Consolidate location: prefer after (current), fall back to before + merged['location'] = merged['location_after'].fillna(merged['location_before']) + + # Consolidate last_counted: prefer after (most recent), fall back to before + merged['last_counted'] = merged['last_counted_after'].fillna(merged['last_counted_before']) + + # Build the clean report — drop suffixed columns and merge indicator + report = merged[[ + 'sku', + 'product_name', + 'location', + 'quantity_before', + 'quantity_after', + 'quantity_diff', + 'last_counted', + 'change_type', + ]].copy() + + type_order = {'removed': 0, 'decreased': 1, 'increased': 2, 'added': 3, 'unchanged': 4} + report['_sort'] = report['change_type'].map(type_order) + report = report.sort_values(['_sort', 'sku']).drop(columns='_sort').reset_index(drop=True) + + counts = report['change_type'].value_counts() + print(f"\nReconciliation summary:") + for change_type in ['increased', 'decreased', 'removed', 'added', 'unchanged']: + count = counts.get(change_type, 0) + print(f" {change_type}: {count}") + + + return report + +if __name__ == "__main__": + + base_dir = os.path.dirname(os.path.abspath(__file__)) + output_dir = os.path.join(base_dir, 'output') + data_dir = os.path.join(base_dir, 'data') + + file_1_path = os.path.join(data_dir, 'snapshot_1.csv') + file_2_path = os.path.join(data_dir, 'snapshot_2.csv') + + before_data, after_data = load_data(file_1_path, file_2_path) + + print("\nSnapshot 1 Data:") + print(before_data.head()) + + print("\nSnapshot 2 Data:") + print(after_data.head()) + + before_data = before_data.rename(columns=COLUMN_MAPPING) + after_data = after_data.rename(columns=COLUMN_MAPPING) + + + before_data = normalize_dtypes(before_data) + after_data = normalize_dtypes(after_data) + before_data, before_issues = validate_clean_sku(before_data, 'Snapshot 1') + after_data, after_issues = validate_clean_sku(after_data, 'Snapshot 2') + product_map, mismatches = generate_sku_product_mapping(before_data, after_data) + before_data['product_name'] = before_data['sku'].map(product_map) + after_data['product_name'] = after_data['sku'].map(product_map) + + report = reconciliation(before_data, after_data) + + csv_path = os.path.join(output_dir, 'reconciliation_report.csv') + json_path = os.path.join(output_dir, 'reconciliation_report.json') + + report.to_csv(csv_path, index=False) + report.to_json(json_path, orient='records', indent=2, date_format='iso') + + print(f"\nReport saved to:") + print(f" CSV: {csv_path}") + print(f" JSON: {json_path}") \ No newline at end of file diff --git a/inventory-reconciliation/tests/test_reconcile.py b/inventory-reconciliation/tests/test_reconcile.py new file mode 100644 index 0000000..93d084c --- /dev/null +++ b/inventory-reconciliation/tests/test_reconcile.py @@ -0,0 +1,179 @@ +import pytest +import pandas as pd +import numpy as np +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from Reconciliation import ( + load_data, + parse_mixed_dates, + normalize_dtypes, + validate_clean_sku, + generate_sku_product_mapping, + reconciliation +) +from unittest.mock import patch + +@patch('pandas.read_csv') +def test_load_data(mock_read_csv): + mock_df1 = pd.DataFrame({'sku': ['SKU-001']}) + mock_df2 = pd.DataFrame({'sku': ['SKU-002']}) + + mock_read_csv.side_effect = [mock_df1, mock_df2] + + df1, df2 = load_data('mock_path_1.csv', 'mock_path_2.csv') + + assert mock_read_csv.call_count == 2 + mock_read_csv.assert_any_call('mock_path_1.csv') + mock_read_csv.assert_any_call('mock_path_2.csv') + + assert list(df1.columns) == ['sku'] + assert df1['sku'].iloc[0] == 'SKU-001' + assert df2['sku'].iloc[0] == 'SKU-002' + + +def test_parse_mixed_dates(): + dates = pd.Series(['2024-05-11', '01/15/2024', '15-01-2024', '2024/01/15', 'invalid']) + parsed = parse_mixed_dates(dates) + + assert parsed[0] == '2024-05-11' + assert parsed[1] == '2024-01-15' + assert parsed[2] == '2024-01-15' + assert parsed[3] == '2024-01-15' + assert pd.isna(parsed[4]) + + +def test_normalize_dtypes(): + df = pd.DataFrame({ + 'sku': [' SKU-001 ', 'SKU-002'], + 'product_name': [' Apple ', 'Banana '], + 'location': [' WH1 ', 'WH2'], + 'quantity': ['10', 'not_a_number'], + 'last_counted': ['2024-01-01', '01/02/2024'] + }) + norm_df = normalize_dtypes(df.copy()) + + assert norm_df['sku'].iloc[0] == 'SKU-001' + assert norm_df['product_name'].iloc[0] == 'Apple' + assert norm_df['location'].iloc[0] == 'WH1' + assert norm_df['quantity'].iloc[0] == 10.0 + assert norm_df['quantity'].iloc[1] == 0.0 + assert norm_df['last_counted'].iloc[0] == '2024-01-01' + assert norm_df['last_counted'].iloc[1] == '2024-01-02' + + +def test_validate_clean_sku(): + df = pd.DataFrame({ + 'sku': [' SKU-001', 'sku-002', 'SKU-3', 'A-4', 'SKU-001'], + 'quantity': [10.0, 20.0, 30.0, 40.0, 50.0] + }) + clean_df, issues = validate_clean_sku(df.copy(), 'Test Dataset') + skus = clean_df['sku'].tolist() + + assert 'SKU-001' in skus + assert 'SKU-002' in skus + assert 'SKU-003' in skus + assert 'A-004' in skus + + # Test deduplication (higher quantity kept) + sku_001_row = clean_df[clean_df['sku'] == 'SKU-001'] + assert len(sku_001_row) == 1 + assert sku_001_row['quantity'].iloc[0] == 50.0 + assert len(clean_df) == 4 + + +def test_generate_sku_product_mapping(): + df1 = pd.DataFrame({ + 'sku': ['SKU-001', 'SKU-002'], + 'product_name': ['Apple', 'Banana Old'] + }) + df2 = pd.DataFrame({ + 'sku': ['SKU-002', 'SKU-003'], + 'product_name': ['Banana New', 'Cherry'] + }) + mapping, mismatches = generate_sku_product_mapping(df1, df2) + + assert mapping['SKU-001'] == 'Apple' + assert mapping['SKU-002'] == 'Banana Old' + assert mapping['SKU-003'] == 'Cherry' + assert 'SKU-002' in mismatches + + +def test_reconciliation(): + before = pd.DataFrame({ + 'sku': ['SKU-001', 'SKU-002', 'SKU-003'], + 'product_name': ['A', 'B', 'C'], + 'location': ['L1', 'L1', 'L1'], + 'quantity_before': [10.0, 20.0, 30.0], + 'last_counted': ['2024-01-01', '2024-01-01', '2024-01-01'] + }) + before.rename(columns={'quantity_before': 'quantity'}, inplace=True) + + after = pd.DataFrame({ + 'sku': ['SKU-001', 'SKU-002', 'SKU-004'], + 'product_name': ['A', 'B', 'D'], + 'location': ['L1', 'L2', 'L1'], + 'quantity': [10.0, 15.0, 40.0], + 'last_counted': ['2024-01-02', '2024-01-02', '2024-01-02'] + }) + + report = reconciliation(before, after) + + assert len(report) == 4 + + sku1 = report[report['sku'] == 'SKU-001'].iloc[0] + assert sku1['change_type'] == 'unchanged' + assert sku1['quantity_diff'] == 0.0 + + sku2 = report[report['sku'] == 'SKU-002'].iloc[0] + assert sku2['change_type'] == 'decreased' + assert sku2['quantity_diff'] == -5.0 + assert sku2['location'] == 'L2' + + sku3 = report[report['sku'] == 'SKU-003'].iloc[0] + assert sku3['change_type'] == 'removed' + assert pd.isna(sku3['quantity_diff']) + + sku4 = report[report['sku'] == 'SKU-004'].iloc[0] + assert sku4['change_type'] == 'added' + assert pd.isna(sku4['quantity_diff']) + + +def test_reconciliation_empty_before(): + """All items should be 'added' when before snapshot is empty.""" + before = pd.DataFrame(columns=['sku', 'product_name', 'location', 'quantity', 'last_counted']) + after = pd.DataFrame({ + 'sku': ['SKU-001'], + 'product_name': ['A'], + 'location': ['L1'], + 'quantity': [100.0], + 'last_counted': ['2024-01-15'], + }) + report = reconciliation(before, after) + assert len(report) == 1 + assert report['change_type'].iloc[0] == 'added' + + +def test_reconciliation_empty_after(): + """All items should be 'removed' when after snapshot is empty.""" + before = pd.DataFrame({ + 'sku': ['SKU-001'], + 'product_name': ['A'], + 'location': ['L1'], + 'quantity': [100.0], + 'last_counted': ['2024-01-08'], + }) + after = pd.DataFrame(columns=['sku', 'product_name', 'location', 'quantity', 'last_counted']) + report = reconciliation(before, after) + assert len(report) == 1 + assert report['change_type'].iloc[0] == 'removed' + + +def test_reconciliation_both_empty(): + """Empty report when both snapshots are empty.""" + before = pd.DataFrame(columns=['sku', 'product_name', 'location', 'quantity', 'last_counted']) + after = pd.DataFrame(columns=['sku', 'product_name', 'location', 'quantity', 'last_counted']) + report = reconciliation(before, after) + assert len(report) == 0 \ No newline at end of file