"""
upload_to_azure_sql.py — Upload Contoso Bank Excel data to Azure SQL Database
This script reads all sheets from Contoso_Bank_Database.xlsx and creates
corresponding tables in Azure SQL Database with proper data types,
primary keys, and foreign key relationships.
Usage:
python upload_to_azure_sql.py
Prerequisites:
pip install pandas pyodbc python-dotenv openpyxl
Environment:
Create a .env file with your Azure SQL credentials:
AZURE_SQL_SERVER=your-server.database.windows.net
AZURE_SQL_DATABASE=your-database-name
AZURE_SQL_USERNAME=your-username
AZURE_SQL_PASSWORD=your-password
AZURE_SQL_DRIVER=ODBC Driver 18 for SQL Server
"""
import os
import sys
import time
import pandas as pd
import pyodbc
from dotenv import load_dotenv
load_dotenv()
# ── Configuration ──
EXCEL_FILE = "Contoso_Bank_Database.xlsx"
DB_SERVER = os.getenv("AZURE_SQL_SERVER", "")
DB_NAME = os.getenv("AZURE_SQL_DATABASE", "")
DB_USERNAME = os.getenv("AZURE_SQL_USERNAME", "")
DB_PASSWORD = os.getenv("AZURE_SQL_PASSWORD", "")
DB_DRIVER = os.getenv("AZURE_SQL_DRIVER", "ODBC Driver 18 for SQL Server")
# ── Table schemas — ordered for foreign key dependencies ──
TABLE_SCHEMAS = {
"Branches": """
CREATE TABLE Branches (
branch_id INT PRIMARY KEY,
branch_name NVARCHAR(100) NOT NULL,
branch_code INT NOT NULL,
city NVARCHAR(50) NOT NULL,
state NVARCHAR(50) NOT NULL,
region NVARCHAR(20) NOT NULL,
address NVARCHAR(255),
pincode INT,
phone NVARCHAR(20),
manager_name NVARCHAR(100),
opened_date DATE,
branch_type NVARCHAR(20),
total_staff INT,
is_active BIT DEFAULT 1
)
""",
"Products": """
CREATE TABLE Products (
product_id INT PRIMARY KEY,
product_name NVARCHAR(100) NOT NULL,
category NVARCHAR(50) NOT NULL,
description NVARCHAR(500),
min_amount DECIMAL(15,2),
max_amount DECIMAL(15,2),
interest_rate DECIMAL(6,4),
processing_fee_pct DECIMAL(6,4),
is_active BIT DEFAULT 1,
launch_date DATE
)
""",
"Employees": """
CREATE TABLE Employees (
employee_id INT PRIMARY KEY,
first_name NVARCHAR(50) NOT NULL,
last_name NVARCHAR(50) NOT NULL,
email NVARCHAR(100),
phone NVARCHAR(20),
designation NVARCHAR(50),
department NVARCHAR(50),
branch_id INT REFERENCES Branches(branch_id),
hire_date DATE,
salary DECIMAL(12,2),
experience_years INT,
gender NVARCHAR(10),
date_of_birth DATE,
is_active BIT DEFAULT 1
)
""",
"Customers": """
CREATE TABLE Customers (
customer_id INT PRIMARY KEY,
first_name NVARCHAR(50) NOT NULL,
last_name NVARCHAR(50) NOT NULL,
email NVARCHAR(100),
phone NVARCHAR(20),
date_of_birth DATE,
gender NVARCHAR(10),
pan_number NVARCHAR(10),
aadhar_last4 NVARCHAR(4),
city NVARCHAR(50),
state NVARCHAR(50),
pincode INT,
occupation NVARCHAR(50),
annual_income DECIMAL(15,2),
customer_since DATE,
kyc_status NVARCHAR(20),
risk_category NVARCHAR(10),
branch_id INT REFERENCES Branches(branch_id)
)
""",
"Accounts": """
CREATE TABLE Accounts (
account_id INT PRIMARY KEY,
account_number NVARCHAR(20) NOT NULL UNIQUE,
customer_id INT REFERENCES Customers(customer_id),
account_type NVARCHAR(20) NOT NULL,
branch_id INT REFERENCES Branches(branch_id),
ifsc_code NVARCHAR(15),
opening_date DATE,
current_balance DECIMAL(15,2),
minimum_balance DECIMAL(15,2),
interest_rate DECIMAL(6,4),
status NVARCHAR(20) DEFAULT 'Active',
nominee_name NVARCHAR(100),
last_transaction_date DATE
)
""",
"Transactions": """
CREATE TABLE Transactions (
transaction_id INT PRIMARY KEY,
account_id INT REFERENCES Accounts(account_id),
transaction_date DATE NOT NULL,
transaction_type NVARCHAR(10) NOT NULL,
category NVARCHAR(30),
amount DECIMAL(15,2) NOT NULL,
balance_after DECIMAL(15,2),
description NVARCHAR(200),
reference_number NVARCHAR(30),
channel NVARCHAR(20),
status NVARCHAR(20) DEFAULT 'Completed'
)
""",
"Loans": """
CREATE TABLE Loans (
loan_id INT PRIMARY KEY,
customer_id INT REFERENCES Customers(customer_id),
loan_type NVARCHAR(30) NOT NULL,
principal_amount DECIMAL(15,2) NOT NULL,
interest_rate DECIMAL(6,4),
tenure_months INT,
emi_amount DECIMAL(12,2),
disbursement_date DATE,
maturity_date DATE,
outstanding_balance DECIMAL(15,2),
total_paid DECIMAL(15,2),
payments_made INT,
payments_remaining INT,
status NVARCHAR(20) DEFAULT 'Active',
collateral_type NVARCHAR(30),
collateral_value DECIMAL(15,2),
branch_id INT REFERENCES Branches(branch_id)
)
""",
"Customer_Products": """
CREATE TABLE Customer_Products (
enrollment_id INT PRIMARY KEY,
customer_id INT REFERENCES Customers(customer_id),
product_id INT REFERENCES Products(product_id),
enrollment_date DATE,
status NVARCHAR(20),
monthly_value DECIMAL(12,2),
channel NVARCHAR(30)
)
""",
"Complaints": """
CREATE TABLE Complaints (
complaint_id INT PRIMARY KEY,
customer_id INT REFERENCES Customers(customer_id),
branch_id INT REFERENCES Branches(branch_id),
category NVARCHAR(50),
priority NVARCHAR(10),
description NVARCHAR(500),
filed_date DATE,
resolved_date DATE,
resolution_days INT,
status NVARCHAR(20),
assigned_to NVARCHAR(100)
)
"""
}
# Order matters — parent tables first
TABLE_ORDER = [
"Branches", "Products", "Employees", "Customers",
"Accounts", "Transactions", "Loans", "Customer_Products", "Complaints"
]
def get_connection():
"""Connect to Azure SQL Database."""
conn_str = (
f"Driver={{{DB_DRIVER}}};"
f"Server=tcp:{DB_SERVER},1433;"
f"Database={DB_NAME};"
f"Uid={DB_USERNAME};"
f"Pwd={DB_PASSWORD};"
f"Encrypt=yes;"
f"TrustServerCertificate=no;"
f"Connection Timeout=30;"
)
return pyodbc.connect(conn_str)
def drop_tables(cursor):
"""Drop all tables in reverse order (child tables first)."""
print("\n── Dropping existing tables ──")
for table in reversed(TABLE_ORDER):
try:
cursor.execute(f"DROP TABLE IF EXISTS [{table}]")
print(f" ✓ Dropped {table}")
except Exception as e:
print(f" ✗ Error dropping {table}: {e}")
def create_tables(cursor):
"""Create all tables with proper schemas."""
print("\n── Creating tables ──")
for table in TABLE_ORDER:
try:
cursor.execute(TABLE_SCHEMAS[table])
print(f" ✓ Created {table}")
except Exception as e:
print(f" ✗ Error creating {table}: {e}")
raise
def upload_data(cursor, conn):
"""Read Excel and upload data to each table."""
print(f"\n── Reading {EXCEL_FILE} ──")
all_sheets = pd.read_excel(EXCEL_FILE, sheet_name=None, engine='openpyxl')
print(f" Found {len(all_sheets)} sheets: {list(all_sheets.keys())}")
print("\n── Uploading data ──")
total_rows = 0
for table in TABLE_ORDER:
if table not in all_sheets:
print(f" ⚠ Sheet '{table}' not found in Excel, skipping")
continue
df = all_sheets[table]
df = df.where(pd.notnull(df), None) # Replace NaN with None for SQL NULL
# Convert boolean columns
for col in df.columns:
if df[col].dtype == bool or col in ['is_active']:
df[col] = df[col].apply(lambda x: 1 if x else 0 if x is not None else None)
row_count = len(df)
col_count = len(df.columns)
placeholders = ', '.join(['?' for _ in df.columns])
col_names = ', '.join([f'[{c}]' for c in df.columns])
insert_sql = f"INSERT INTO [{table}] ({col_names}) VALUES ({placeholders})"
# Upload in batches
batch_size = 100
errors = 0
start = time.time()
for i in range(0, row_count, batch_size):
batch = df.iloc[i:i + batch_size]
for _, row in batch.iterrows():
try:
values = []
for v in row.values:
if isinstance(v, pd.Timestamp):
values.append(v.to_pydatetime())
elif isinstance(v, float) and pd.isna(v):
values.append(None)
else:
values.append(v)
cursor.execute(insert_sql, tuple(values))
except Exception as e:
errors += 1
if errors <= 3:
print(f" ⚠ Row error in {table}: {e}")
conn.commit()
elapsed = time.time() - start
total_rows += row_count
status = "✓" if errors == 0 else "⚠"
print(f" {status} {table}: {row_count} rows, {col_count} cols ({elapsed:.1f}s)"
+ (f" [{errors} errors]" if errors else ""))
return total_rows
def verify_upload(cursor):
"""Verify row counts in all tables."""
print("\n── Verification ──")
print(f" {'Table':<25} {'Rows':>8}")
print(f" {'─' * 25} {'─' * 8}")
total = 0
for table in TABLE_ORDER:
try:
cursor.execute(f"SELECT COUNT(*) FROM [{table}]")
count = cursor.fetchone()[0]
total += count
print(f" {table:<25} {count:>8,}")
except Exception as e:
print(f" {table:<25} ERROR: {e}")
print(f" {'─' * 25} {'─' * 8}")
print(f" {'TOTAL':<25} {total:>8,}")
return total
def create_indexes(cursor):
"""Create indexes for common query patterns."""
print("\n── Creating indexes ──")
indexes = [
("IX_Employees_BranchID", "Employees", "branch_id"),
("IX_Customers_City", "Customers", "city"),
("IX_Customers_BranchID", "Customers", "branch_id"),
("IX_Accounts_CustomerID", "Accounts", "customer_id"),
("IX_Accounts_BranchID", "Accounts", "branch_id"),
("IX_Accounts_Status", "Accounts", "status"),
("IX_Transactions_AccountID", "Transactions", "account_id"),
("IX_Transactions_Date", "Transactions", "transaction_date"),
("IX_Transactions_Category", "Transactions", "category"),
("IX_Loans_CustomerID", "Loans", "customer_id"),
("IX_Loans_Status", "Loans", "status"),
("IX_Loans_Type", "Loans", "loan_type"),
("IX_CustomerProducts_CustomerID", "Customer_Products", "customer_id"),
("IX_Complaints_CustomerID", "Complaints", "customer_id"),
("IX_Complaints_Status", "Complaints", "status"),
]
for idx_name, table, col in indexes:
try:
cursor.execute(f"CREATE NONCLUSTERED INDEX [{idx_name}] ON [{table}] ([{col}])")
print(f" ✓ {idx_name}")
except Exception as e:
if "already exists" in str(e):
print(f" ⚠ {idx_name} already exists")
else:
print(f" ✗ {idx_name}: {e}")
def main():
# Validate config
if not all([DB_SERVER, DB_NAME, DB_USERNAME, DB_PASSWORD]):
print("ERROR: Missing database credentials in .env file")
print("Required: AZURE_SQL_SERVER, AZURE_SQL_DATABASE, AZURE_SQL_USERNAME, AZURE_SQL_PASSWORD")
sys.exit(1)
if not os.path.exists(EXCEL_FILE):
print(f"ERROR: {EXCEL_FILE} not found in current directory")
sys.exit(1)
print("=" * 60)
print(" CONTOSO BANK — Excel to Azure SQL Upload")
print("=" * 60)
print(f"\n Server: {DB_SERVER}")
print(f" Database: {DB_NAME}")
print(f" File: {EXCEL_FILE}")
start_time = time.time()
try:
print("\n── Connecting to Azure SQL ──")
conn = get_connection()
cursor = conn.cursor()
print(" ✓ Connected successfully")
drop_tables(cursor)
conn.commit()
create_tables(cursor)
conn.commit()
total_rows = upload_data(cursor, conn)
create_indexes(cursor)
conn.commit()
verify_upload(cursor)
elapsed = time.time() - start_time
print(f"\n{'=' * 60}")
print(f" UPLOAD COMPLETE")
print(f" Total rows: {total_rows:,}")
print(f" Tables: {len(TABLE_ORDER)}")
print(f" Time: {elapsed:.1f} seconds")
print(f"{'=' * 60}")
except pyodbc.Error as e:
print(f"\nDATABASE ERROR: {e}")
sys.exit(1)
except Exception as e:
print(f"\nERROR: {e}")
raise
finally:
if 'conn' in locals():
conn.close()
print("\nConnection closed.")
if __name__ == "__main__":
main()