CW

Snowflake Data Governance and Compliance

Free Lesson

Advertisement

Snowflake Data Governance and Compliance

Data governance in Snowflake provides a structured framework for managing data availability, usability, integrity, and security across the organization.

Governance Framework Components

Data Classification

-- Create classification tags
CREATE TAG governance.data_classification;
CREATE TAG governance.sensitivity_level;
CREATE TAG governance.retention_period;

-- Classify tables
ALTER TABLE customers SET TAG
  governance.data_classification = 'PII',
  governance.sensitivity_level = 'HIGH',
  governance.retention_period = '7_YEARS';

-- Classify columns
ALTER TABLE customers MODIFY COLUMN ssn SET TAG
  governance.data_classification = 'PII',
  governance.sensitivity_level = 'CRITICAL';

-- Query classified data
SELECT
  table_name,
  column_name,
  TAG:governance::data_classification AS classification,
  TAG:governance::sensitivity_level AS sensitivity
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TAG:governance::data_classification IS NOT NULL;

Data Lineage

-- Enable data lineage tracking
ALTER ACCOUNT SET ENABLE_DATA_LINEAGE = TRUE;

-- Query lineage information
SELECT
  source_table,
  target_table,
  transformation_type,
  column_mappings
FROM SNOWFLAKE.ACCOUNT_USAGE.DATA_LINEAGE
WHERE source_table = 'CUSTOMERS'
  AND start_time >= DATEADD('day', -30, CURRENT_TIMESTAMP());

-- Track column-level lineage
SELECT
  source_column,
  target_column,
  transformation_expression
FROM SNOWFLAKE.ACCOUNT_USAGE.COLUMN_LINEAGE
WHERE source_table = 'RAW_DATA'
  AND target_table = 'PROCESSED_DATA';

Data Lifecycle Management

Retention Policies

-- Set table retention
ALTER TABLE orders SET DATA_RETENTION_TIME_IN_DAYS = 90;

-- Set schema retention
ALTER SCHEMA production SET DATA_RETENTION_TIME_IN_DAYS = 365;

-- Set database retention
ALTER DATABASE analytics_db SET DATA_RETENTION_TIME_IN_DAYS = 30;

-- Check retention settings
SELECT
  table_name,
  data_retention_time_in_days,
  created_on,
  last_altered
FROM INFORMATION_SCHEMA.TABLES
WHERE table_schema = 'PRODUCTION';

Data Archival

-- Create archival table
CREATE TABLE orders_archive AS
SELECT * FROM orders
WHERE order_date < DATEADD('year', -2, CURRENT_DATE());

-- Move old data to archive
INSERT INTO orders_archive
SELECT * FROM orders
WHERE order_date < DATEADD('year', -2, CURRENT_DATE());

-- Delete archived data from main table
DELETE FROM orders
WHERE order_date < DATEADD('year', -2, CURRENT_DATE());

-- Create external table for long-term storage
CREATE OR REPLACE EXTERNAL TABLE orders_external
  WITH LOCATION = @archive_stage
  FILE_FORMAT = (TYPE = PARQUET)
  AUTO_REFRESH = false;

Secure Data Deletion

-- Securely delete sensitive data
BEGIN TRANSACTION;
  -- Archive if needed
  INSERT INTO data_backup
  SELECT * FROM sensitive_data
  WHERE deletion_date <= CURRENT_DATE();

  -- Delete data
  DELETE FROM sensitive_data
  WHERE deletion_date <= CURRENT_DATE();

  -- Verify deletion
  SELECT COUNT(*) FROM sensitive_data
  WHERE deletion_date <= CURRENT_DATE();
COMMIT;

-- Time travel recovery window
-- Data can be recovered within retention period
SELECT * FROM sensitive_data
  AT (OFFSET => -86400);  -- 24 hours ago

Compliance Frameworks

GDPR Compliance

-- Right to erasure (Right to be forgotten)
BEGIN TRANSACTION;
  -- Identify customer data across tables
  INSERT INTO customer_data_erasure_log
  SELECT customer_id, CURRENT_TIMESTAMP() as erasure_time
  FROM customers WHERE email = 'user@example.com';

  -- Anonymize personal data
  UPDATE customers SET
    email = 'anonymized_' || customer_id || '@deleted.com',
    name = 'ANONYMIZED',
    phone = NULL
  WHERE email = 'user@example.com';

  -- Verify anonymization
  SELECT * FROM customers WHERE customer_id = 'target_customer_id';
COMMIT;

-- Data portability (Right to data portability)
SELECT
  customer_id,
  name,
  email,
  order_history
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
WHERE c.customer_id = 'target_customer_id'
ORDER BY o.order_date;

CCPA Compliance

-- Create CCPA compliance view
CREATE OR REPLACE VIEW ccpa_customer_data AS
SELECT
  customer_id,
  name,
  email,
  -- Mask sensitive fields for non-authorized users
  CASE
    WHEN CURRENT_ROLE() IN ('PRIVACY_OFFICER', 'ADMIN')
    THEN phone
    ELSE '***-***-' || RIGHT(phone, 4)
  END AS phone,
  created_at,
  last_accessed
FROM customers;

-- Track data access for CCPA
SELECT
  customer_id,
  query_id,
  user_name,
  access_time
FROM SNOWFLAKE.ACCOUNT_USAGE.ACCESS_HISTORY
WHERE object_name = 'CUSTOMERS'
  AND access_time >= DATEADD('year', -1, CURRENT_TIMESTAMP());

SOX Compliance

-- Implement SOX controls
CREATE OR REPLACE PROCEDURE audit_financial_data()
RETURNS STRING
LANGUAGE SQL
AS
$$
BEGIN
  -- Log all financial data access
  INSERT INTO audit_log (
    table_name,
    action,
    user_name,
    access_time
  )
  SELECT
    'FINANCIAL_DATA',
    'READ',
    CURRENT_USER(),
    CURRENT_TIMESTAMP();

  -- Verify data integrity
  IF (SELECT COUNT(*) FROM financial_data WHERE amount < 0) > 0 THEN
    RETURN 'ERROR: Negative amounts detected';
  END IF;

  RETURN 'SUCCESS: Audit completed';
END;
$$;

-- Execute audit procedure
CALL audit_financial_data();

Data Quality Governance

-- Create data quality rules
CREATE OR REPLACE PROCEDURE validate_data_quality()
RETURNS STRING
LANGUAGE SQL
AS
$$
DECLARE
  error_count INTEGER;
BEGIN
  -- Check for null values
  SELECT COUNT(*) INTO error_count
  FROM customers WHERE email IS NULL;

  IF error_count > 0 THEN
    RETURN 'ERROR: ' || error_count || ' null emails found';
  END IF;

  -- Check for duplicates
  SELECT COUNT(*) INTO error_count
  FROM (
    SELECT email, COUNT(*)
    FROM customers
    GROUP BY email
    HAVING COUNT(*) > 1
  );

  IF error_count > 0 THEN
    RETURN 'ERROR: ' || error_count || ' duplicate emails found';
  END IF;

  RETURN 'SUCCESS: Data quality validated';
END;
$$;

-- Schedule quality checks
CREATE TASK daily_quality_check
  SCHEDULE = 'USING CRON 0 2 * * * America/New_York'
AS
  CALL validate_data_quality();

Effective data governance requires a combination of technical controls (policies, masks, encryption) and organizational processes (training, auditing, compliance). Regular reviews ensure governance remains effective.

Governance Monitoring Dashboard

-- Create governance monitoring view
CREATE OR REPLACE VIEW governance_dashboard AS
SELECT
  'Data Classification' as metric,
  COUNT(*) as value,
  'Tables' as unit
FROM INFORMATION_SCHEMA.TABLES
WHERE TAG:governance::data_classification IS NOT NULL

UNION ALL

SELECT
  'Masked Columns' as metric,
  COUNT(*) as value,
  'Columns' as unit
FROM INFORMATION_SCHEMA.COLUMNS
WHERE masking_policy_name IS NOT NULL

UNION ALL

SELECT
  'Active Policies' as metric,
  COUNT(*) as value,
  'Policies' as unit
FROM INFORMATION_SCHEMA.ROW_ACCESS_POLICIES

UNION ALL

SELECT
  'Retention Compliance' as metric,
  COUNT(*) as value,
  'Tables' as unit
FROM INFORMATION_SCHEMA.TABLES
WHERE data_retention_time_in_days > 0;

Key Takeaways:

  • Data classification enables appropriate protection measures
  • Data lineage tracks data movement and transformations
  • Lifecycle management balances accessibility with cost
  • Compliance frameworks (GDPR, CCPA, SOX) require specific controls
  • Regular audits ensure governance effectiveness
  • Technical controls must be combined with organizational processes

Advertisement

Need Expert Snowflake Help?

Get personalized warehouse optimization, data modeling, or Snowflake platform consulting.

Advertisement