Snowflake Data Governance and Compliance
Data governance in Snowflake provides a structured framework for managing data availability, usability, integrity, and security across the organization.
Governance Framework Components
Data Classification
-- Create classification tags
CREATE TAG governance.data_classification;
CREATE TAG governance.sensitivity_level;
CREATE TAG governance.retention_period;
-- Classify tables
ALTER TABLE customers SET TAG
governance.data_classification = 'PII',
governance.sensitivity_level = 'HIGH',
governance.retention_period = '7_YEARS';
-- Classify columns
ALTER TABLE customers MODIFY COLUMN ssn SET TAG
governance.data_classification = 'PII',
governance.sensitivity_level = 'CRITICAL';
-- Query classified data
SELECT
table_name,
column_name,
TAG:governance::data_classification AS classification,
TAG:governance::sensitivity_level AS sensitivity
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TAG:governance::data_classification IS NOT NULL;
Data Lineage
-- Enable data lineage tracking
ALTER ACCOUNT SET ENABLE_DATA_LINEAGE = TRUE;
-- Query lineage information
SELECT
source_table,
target_table,
transformation_type,
column_mappings
FROM SNOWFLAKE.ACCOUNT_USAGE.DATA_LINEAGE
WHERE source_table = 'CUSTOMERS'
AND start_time >= DATEADD('day', -30, CURRENT_TIMESTAMP());
-- Track column-level lineage
SELECT
source_column,
target_column,
transformation_expression
FROM SNOWFLAKE.ACCOUNT_USAGE.COLUMN_LINEAGE
WHERE source_table = 'RAW_DATA'
AND target_table = 'PROCESSED_DATA';
Data Lifecycle Management
Retention Policies
-- Set table retention
ALTER TABLE orders SET DATA_RETENTION_TIME_IN_DAYS = 90;
-- Set schema retention
ALTER SCHEMA production SET DATA_RETENTION_TIME_IN_DAYS = 365;
-- Set database retention
ALTER DATABASE analytics_db SET DATA_RETENTION_TIME_IN_DAYS = 30;
-- Check retention settings
SELECT
table_name,
data_retention_time_in_days,
created_on,
last_altered
FROM INFORMATION_SCHEMA.TABLES
WHERE table_schema = 'PRODUCTION';
Data Archival
-- Create archival table
CREATE TABLE orders_archive AS
SELECT * FROM orders
WHERE order_date < DATEADD('year', -2, CURRENT_DATE());
-- Move old data to archive
INSERT INTO orders_archive
SELECT * FROM orders
WHERE order_date < DATEADD('year', -2, CURRENT_DATE());
-- Delete archived data from main table
DELETE FROM orders
WHERE order_date < DATEADD('year', -2, CURRENT_DATE());
-- Create external table for long-term storage
CREATE OR REPLACE EXTERNAL TABLE orders_external
WITH LOCATION = @archive_stage
FILE_FORMAT = (TYPE = PARQUET)
AUTO_REFRESH = false;
Secure Data Deletion
-- Securely delete sensitive data
BEGIN TRANSACTION;
-- Archive if needed
INSERT INTO data_backup
SELECT * FROM sensitive_data
WHERE deletion_date <= CURRENT_DATE();
-- Delete data
DELETE FROM sensitive_data
WHERE deletion_date <= CURRENT_DATE();
-- Verify deletion
SELECT COUNT(*) FROM sensitive_data
WHERE deletion_date <= CURRENT_DATE();
COMMIT;
-- Time travel recovery window
-- Data can be recovered within retention period
SELECT * FROM sensitive_data
AT (OFFSET => -86400); -- 24 hours ago
Compliance Frameworks
GDPR Compliance
-- Right to erasure (Right to be forgotten)
BEGIN TRANSACTION;
-- Identify customer data across tables
INSERT INTO customer_data_erasure_log
SELECT customer_id, CURRENT_TIMESTAMP() as erasure_time
FROM customers WHERE email = 'user@example.com';
-- Anonymize personal data
UPDATE customers SET
email = 'anonymized_' || customer_id || '@deleted.com',
name = 'ANONYMIZED',
phone = NULL
WHERE email = 'user@example.com';
-- Verify anonymization
SELECT * FROM customers WHERE customer_id = 'target_customer_id';
COMMIT;
-- Data portability (Right to data portability)
SELECT
customer_id,
name,
email,
order_history
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
WHERE c.customer_id = 'target_customer_id'
ORDER BY o.order_date;
CCPA Compliance
-- Create CCPA compliance view
CREATE OR REPLACE VIEW ccpa_customer_data AS
SELECT
customer_id,
name,
email,
-- Mask sensitive fields for non-authorized users
CASE
WHEN CURRENT_ROLE() IN ('PRIVACY_OFFICER', 'ADMIN')
THEN phone
ELSE '***-***-' || RIGHT(phone, 4)
END AS phone,
created_at,
last_accessed
FROM customers;
-- Track data access for CCPA
SELECT
customer_id,
query_id,
user_name,
access_time
FROM SNOWFLAKE.ACCOUNT_USAGE.ACCESS_HISTORY
WHERE object_name = 'CUSTOMERS'
AND access_time >= DATEADD('year', -1, CURRENT_TIMESTAMP());
SOX Compliance
-- Implement SOX controls
CREATE OR REPLACE PROCEDURE audit_financial_data()
RETURNS STRING
LANGUAGE SQL
AS
$$
BEGIN
-- Log all financial data access
INSERT INTO audit_log (
table_name,
action,
user_name,
access_time
)
SELECT
'FINANCIAL_DATA',
'READ',
CURRENT_USER(),
CURRENT_TIMESTAMP();
-- Verify data integrity
IF (SELECT COUNT(*) FROM financial_data WHERE amount < 0) > 0 THEN
RETURN 'ERROR: Negative amounts detected';
END IF;
RETURN 'SUCCESS: Audit completed';
END;
$$;
-- Execute audit procedure
CALL audit_financial_data();
Data Quality Governance
-- Create data quality rules
CREATE OR REPLACE PROCEDURE validate_data_quality()
RETURNS STRING
LANGUAGE SQL
AS
$$
DECLARE
error_count INTEGER;
BEGIN
-- Check for null values
SELECT COUNT(*) INTO error_count
FROM customers WHERE email IS NULL;
IF error_count > 0 THEN
RETURN 'ERROR: ' || error_count || ' null emails found';
END IF;
-- Check for duplicates
SELECT COUNT(*) INTO error_count
FROM (
SELECT email, COUNT(*)
FROM customers
GROUP BY email
HAVING COUNT(*) > 1
);
IF error_count > 0 THEN
RETURN 'ERROR: ' || error_count || ' duplicate emails found';
END IF;
RETURN 'SUCCESS: Data quality validated';
END;
$$;
-- Schedule quality checks
CREATE TASK daily_quality_check
SCHEDULE = 'USING CRON 0 2 * * * America/New_York'
AS
CALL validate_data_quality();
Effective data governance requires a combination of technical controls (policies, masks, encryption) and organizational processes (training, auditing, compliance). Regular reviews ensure governance remains effective.
Governance Monitoring Dashboard
-- Create governance monitoring view
CREATE OR REPLACE VIEW governance_dashboard AS
SELECT
'Data Classification' as metric,
COUNT(*) as value,
'Tables' as unit
FROM INFORMATION_SCHEMA.TABLES
WHERE TAG:governance::data_classification IS NOT NULL
UNION ALL
SELECT
'Masked Columns' as metric,
COUNT(*) as value,
'Columns' as unit
FROM INFORMATION_SCHEMA.COLUMNS
WHERE masking_policy_name IS NOT NULL
UNION ALL
SELECT
'Active Policies' as metric,
COUNT(*) as value,
'Policies' as unit
FROM INFORMATION_SCHEMA.ROW_ACCESS_POLICIES
UNION ALL
SELECT
'Retention Compliance' as metric,
COUNT(*) as value,
'Tables' as unit
FROM INFORMATION_SCHEMA.TABLES
WHERE data_retention_time_in_days > 0;
Key Takeaways:
- Data classification enables appropriate protection measures
- Data lineage tracks data movement and transformations
- Lifecycle management balances accessibility with cost
- Compliance frameworks (GDPR, CCPA, SOX) require specific controls
- Regular audits ensure governance effectiveness
- Technical controls must be combined with organizational processes