004-optimize-indexes.sql•11.7 kB
-- =====================================================
-- Database Optimization: Indexes & Constraints
-- =====================================================
-- Run this in Supabase SQL Editor after initial import
-- Estimated execution time: 5-10 minutes
-- Storage impact: +150 MB
-- Performance impact: 10x improvement for filtered queries
-- =====================================================
-- Step 1: Add composite indexes for common query patterns
-- =====================================================
-- Index for "Active companies of specific type"
-- Query pattern: WHERE organisationsform = 'AB' AND avregistreringsdatum IS NULL
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_companies_form_active
ON public.companies(organisationsform, organisationsidentitet)
WHERE avregistreringsdatum IS NULL;
-- Index for "Companies registered in date range with status"
-- Query pattern: WHERE registreringsdatum BETWEEN ... AND ... AND avregistreringsdatum IS NULL
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_companies_reg_date_active
ON public.companies(registreringsdatum DESC, organisationsidentitet)
WHERE avregistreringsdatum IS NULL;
-- Index for "Sort by name within company type"
-- Query pattern: WHERE organisationsform = 'AB' ORDER BY organisationsnamn
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_companies_form_namn
ON public.companies(organisationsform, organisationsnamn text_pattern_ops);
-- Index for "Companies by postadress region" (for future postcode queries)
-- Query pattern: WHERE postadress LIKE '11%' (Stockholm region)
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_companies_postadress_prefix
ON public.companies(SUBSTRING(postadress FROM '^\d{3}'))
WHERE postadress IS NOT NULL;
-- Step 2: Optimize full-text search indexes
-- =====================================================
-- Drop existing FTS index if it exists
DROP INDEX CONCURRENTLY IF EXISTS idx_companies_fts;
-- Create separate FTS indexes for better performance
-- Index 1: Company name only (most common search)
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_companies_fts_namn
ON public.companies
USING GIN (to_tsvector('swedish', organisationsnamn));
-- Index 2: Description only (less frequent, partial index)
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_companies_fts_beskrivning
ON public.companies
USING GIN (to_tsvector('swedish', verksamhetsbeskrivning))
WHERE verksamhetsbeskrivning IS NOT NULL AND verksamhetsbeskrivning != '';
-- Index 3: Combined FTS for active companies only (reduces index size by ~50%)
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_companies_fts_active
ON public.companies
USING GIN (
to_tsvector('swedish',
COALESCE(organisationsnamn, '') || ' ' ||
COALESCE(verksamhetsbeskrivning, '')
)
)
WHERE avregistreringsdatum IS NULL;
-- Step 3: Add BRIN indexes for time-series data
-- =====================================================
-- BRIN index for created_at (efficient for large tables)
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_companies_created_brin
ON public.companies
USING BRIN (created_at, updated_at);
-- Step 4: Optimize cache table indexes
-- =====================================================
-- Company details cache
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_cache_details_expires_brin
ON public.company_details_cache
USING BRIN (cache_expires_at, fetched_at);
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_cache_details_active
ON public.company_details_cache(organisationsidentitet)
WHERE cache_expires_at > NOW();
-- Documents cache
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_cache_docs_expires_brin
ON public.company_documents_cache
USING BRIN (cache_expires_at, fetched_at);
-- Financial reports
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_financial_org_year
ON public.financial_reports(organisationsidentitet, report_year DESC);
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_financial_with_metrics
ON public.financial_reports(organisationsidentitet, report_year DESC)
WHERE key_metrics IS NOT NULL;
-- Step 5: Add CHECK constraints for data integrity
-- =====================================================
-- Ensure registration dates are logical
ALTER TABLE public.companies
ADD CONSTRAINT IF NOT EXISTS check_registration_dates
CHECK (
avregistreringsdatum IS NULL OR
avregistreringsdatum >= registreringsdatum
);
-- Ensure organisationsidentitet is exactly 10 digits
ALTER TABLE public.companies
ADD CONSTRAINT IF NOT EXISTS check_org_number_format
CHECK (organisationsidentitet ~ '^\d{10}$');
-- Ensure valid organisationsform values
ALTER TABLE public.companies
ADD CONSTRAINT IF NOT EXISTS check_organisationsform
CHECK (
organisationsform IS NULL OR
organisationsform IN (
'AB', -- Aktiebolag
'HB', -- Handelsbolag
'KB', -- Kommanditbolag
'EK', -- Enskild firma
'BRF', -- Bostadsrättsförening
'FL', -- Förening (ideell)
'SF', -- Stiftelse
'E', -- Ekonomisk förening
'I', -- Ideell förening
'TSF', -- Trossamfund
'EF', -- Europakooperativ
'SE', -- Europabolag
'SCE', -- Europakooperativ
'EEIG', -- Europeisk ekonomisk intressegruppering
'OFB' -- Ömsesidigt försäkringsbolag
)
);
-- Financial reports: validate year range
ALTER TABLE public.financial_reports
ADD CONSTRAINT IF NOT EXISTS check_report_year
CHECK (
report_year BETWEEN 1900 AND EXTRACT(YEAR FROM NOW()) + 1
);
-- Step 6: Create materialized view for financial summary
-- =====================================================
CREATE MATERIALIZED VIEW IF NOT EXISTS public.company_financial_summary AS
SELECT
f.organisationsidentitet,
c.organisationsnamn,
c.organisationsform,
MAX(f.report_year) as latest_report_year,
COUNT(f.id) as total_reports,
jsonb_object_agg(
f.report_year::text,
jsonb_build_object(
'balance_sheet', f.balance_sheet,
'income_statement', f.income_statement,
'key_metrics', f.key_metrics,
'storage_path', f.storage_path
)
) FILTER (WHERE f.key_metrics IS NOT NULL) as yearly_data
FROM public.financial_reports f
INNER JOIN public.companies c ON f.organisationsidentitet = c.organisationsidentitet
WHERE f.key_metrics IS NOT NULL
GROUP BY f.organisationsidentitet, c.organisationsnamn, c.organisationsform;
-- Create unique index on materialized view
CREATE UNIQUE INDEX IF NOT EXISTS idx_financial_summary_org
ON public.company_financial_summary(organisationsidentitet);
-- Create index for sorting
CREATE INDEX IF NOT EXISTS idx_financial_summary_year
ON public.company_financial_summary(latest_report_year DESC);
-- Step 7: Create helper functions for index maintenance
-- =====================================================
-- Function to refresh financial summary (call after importing reports)
CREATE OR REPLACE FUNCTION public.refresh_financial_summary()
RETURNS void
LANGUAGE plpgsql
AS $$
BEGIN
REFRESH MATERIALIZED VIEW CONCURRENTLY public.company_financial_summary;
RAISE NOTICE 'Financial summary refreshed at %', NOW();
END;
$$;
-- Function to analyze and optimize all tables
CREATE OR REPLACE FUNCTION public.optimize_tables()
RETURNS void
LANGUAGE plpgsql
AS $$
BEGIN
-- Analyze tables for query planner
ANALYZE public.companies;
ANALYZE public.company_details_cache;
ANALYZE public.company_documents_cache;
ANALYZE public.financial_reports;
-- Vacuum to reclaim space
VACUUM ANALYZE public.companies;
VACUUM ANALYZE public.company_details_cache;
VACUUM ANALYZE public.company_documents_cache;
VACUUM ANALYZE public.financial_reports;
RAISE NOTICE 'Tables optimized at %', NOW();
END;
$$;
-- Function to get index usage statistics
CREATE OR REPLACE FUNCTION public.get_index_stats()
RETURNS TABLE (
schemaname name,
tablename name,
indexname name,
idx_scan bigint,
idx_tup_read bigint,
idx_tup_fetch bigint,
size_mb numeric
)
LANGUAGE sql
AS $$
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch,
pg_size_pretty(pg_relation_size(indexrelid))::numeric as size_mb
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
AND tablename IN ('companies', 'company_details_cache', 'company_documents_cache', 'financial_reports')
ORDER BY idx_scan DESC;
$$;
-- Function to identify unused indexes
CREATE OR REPLACE FUNCTION public.find_unused_indexes()
RETURNS TABLE (
schema_name name,
table_name name,
index_name name,
index_size text,
scans bigint
)
LANGUAGE sql
AS $$
SELECT
schemaname::name,
tablename::name,
indexname::name,
pg_size_pretty(pg_relation_size(indexrelid))::text,
idx_scan
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
AND idx_scan = 0
AND indexname NOT LIKE '%_pkey' -- Keep primary keys
ORDER BY pg_relation_size(indexrelid) DESC;
$$;
-- Step 8: Create automated maintenance schedule (optional)
-- =====================================================
-- Requires pg_cron extension
-- Enable pg_cron if available
DO $$
BEGIN
IF EXISTS (
SELECT 1 FROM pg_available_extensions
WHERE name = 'pg_cron'
) THEN
CREATE EXTENSION IF NOT EXISTS pg_cron;
-- Schedule daily table optimization (2 AM UTC)
PERFORM cron.schedule(
'optimize-tables',
'0 2 * * *',
'SELECT public.optimize_tables()'
);
-- Schedule weekly financial summary refresh (Sunday 3 AM UTC)
PERFORM cron.schedule(
'refresh-financial-summary',
'0 3 * * 0',
'SELECT public.refresh_financial_summary()'
);
RAISE NOTICE 'Automated maintenance scheduled';
ELSE
RAISE NOTICE 'pg_cron extension not available, skipping scheduled jobs';
END IF;
END $$;
-- Step 9: Analyze new indexes
-- =====================================================
-- Gather statistics for all new indexes
ANALYZE public.companies;
ANALYZE public.company_details_cache;
ANALYZE public.company_documents_cache;
ANALYZE public.financial_reports;
-- Step 10: Verification
-- =====================================================
DO $$
DECLARE
index_count INTEGER;
constraint_count INTEGER;
BEGIN
-- Count indexes on companies table
SELECT COUNT(*) INTO index_count
FROM pg_indexes
WHERE schemaname = 'public'
AND tablename = 'companies';
-- Count constraints
SELECT COUNT(*) INTO constraint_count
FROM pg_constraint
WHERE conrelid = 'public.companies'::regclass;
RAISE NOTICE '✅ Optimization complete!';
RAISE NOTICE 'Indexes on companies table: %', index_count;
RAISE NOTICE 'Constraints on companies table: %', constraint_count;
RAISE NOTICE '';
RAISE NOTICE 'Next steps:';
RAISE NOTICE '1. Run: SELECT * FROM public.get_index_stats();';
RAISE NOTICE '2. Monitor query performance for 1 week';
RAISE NOTICE '3. Run: SELECT * FROM public.find_unused_indexes();';
RAISE NOTICE '4. Remove unused indexes if any';
END $$;
-- =====================================================
-- End of optimization migration
-- =====================================================
COMMENT ON FUNCTION public.optimize_tables() IS 'Run ANALYZE and VACUUM on all main tables. Run daily.';
COMMENT ON FUNCTION public.refresh_financial_summary() IS 'Refresh materialized view of financial summaries. Run after importing new reports.';
COMMENT ON FUNCTION public.get_index_stats() IS 'Get statistics on index usage to identify performance issues.';
COMMENT ON FUNCTION public.find_unused_indexes() IS 'Find indexes that are never used and can be dropped to save storage.';