Savvi Studio

Reserved ID Ranges and Seed Data

Overview

This document defines reserved snowflake ID ranges for system objects that are part of seed data, ensuring stable references across environments.

Problem Statement

System objects (predicates, types, core resources) need:

  1. Stable IDs across all environments (dev, staging, prod)
  2. Predictable references for application code
  3. No conflicts with user-generated content
  4. Easy identification in logs and debugging

Snowflake ID Structure

64 bits total:
┌─────────────────────────────────────────┬──────────┬──────────┬────────────────┐
│           timestamp (41 bits)           │ dc (5)   │ worker(5)│  sequence (12) │
└─────────────────────────────────────────┴──────────┴──────────┴────────────────┘

Reserved ID Ranges

Reserve the first N IDs for system objects using util schema functions:

-- System ID range: 1 - 100,000
-- User ID range: 100,001+

-- Sequences defined in db/000_prelude.sql:
-- util.snowflake_system_seq (1-100000)
-- util.snowflake_user_seq (100001+)

-- Use util functions for ID generation
CREATE TABLE graph.resource (
    id BIGINT PRIMARY KEY DEFAULT util.snowflake_id_user(),
    -- rest of columns...
);

-- For seed data, explicitly use system ID function
INSERT INTO graph.resource (id, type_namespace, external_id, data)
VALUES (
    util.snowflake_id_system(),  -- System ID
    'rdf.type',
    'rdf.type',
    '{"label": "type"}'
);

Utility Functions (from db/000_prelude.sql):

  • util.snowflake_id_system() - Generate system ID (1-100000)
  • util.snowflake_id_user() - Generate user ID (100001+)
  • util.is_system_id(id) - Check if ID is in system range
  • util.snowflake_timestamp(id) - Extract timestamp (future)

System ID Allocation

-- ID Range     | Purpose
-- -------------|--------------------------------------------------
-- 1-1000       | Core RDF predicates (rdf.type, rdfs.subClassOf, etc)
-- 1001-2000    | Auth predicates (auth.read, auth.write, auth.admin)
-- 2001-3000    | Org predicates (org.member, org.owns, org.manages)
-- 3001-4000    | Core types (graph.type, graph.predicate)
-- 4001-5000    | Resource types (auth.user, auth.organization, etc)
-- 5001-10000   | System resources (root user, system org, etc)
-- 10001-100000 | Reserved for future system use
-- 100001+      | User-generated content

Seed Data with Stable IDs

Core RDF Predicates (1-1000)

-- Reset system sequence to start of range
SELECT setval('util.snowflake_system_seq', 1, false);

-- Insert with explicit IDs using util function
INSERT INTO graph.resource (id, type_namespace, external_id, data) 
SELECT * FROM (VALUES
    (1, 'rdf.type'::ltree, 'rdf.type', '{"label": "type", "description": "Instance of type"}'::jsonb),
    (2, 'rdfs.subClassOf'::ltree, 'rdfs.subClassOf', '{"label": "subClassOf", "description": "Type inheritance"}'::jsonb),
    (3, 'rdfs.label'::ltree, 'rdfs.label', '{"label": "label", "description": "Human-readable label"}'::jsonb),
    (4, 'rdfs.comment'::ltree, 'rdfs.comment', '{"label": "comment", "description": "Description"}'::jsonb),
    (5, 'rdfs.domain'::ltree, 'rdfs.domain', '{"label": "domain", "description": "Valid subject types"}'::jsonb),
    (6, 'rdfs.range'::ltree, 'rdfs.range', '{"label": "range", "description": "Valid object types"}'::jsonb)
) AS seed_data(id, type_namespace, external_id, data)
ON CONFLICT (id) DO UPDATE
SET data = EXCLUDED.data, updated_at = now();

-- Advance system sequence past this range
SELECT setval('util.snowflake_system_seq', 100);

Auth Predicates (1001-2000)

-- Set system sequence to auth predicate range
SELECT setval('util.snowflake_system_seq', 1001, false);

INSERT INTO graph.resource (id, type_namespace, external_id, data)
SELECT * FROM (VALUES
    (1001, 'auth.read'::ltree, 'auth.read', '{"label": "Read", "transitive": true}'::jsonb),
    (1002, 'auth.write'::ltree, 'auth.write', '{"label": "Write", "transitive": true}'::jsonb),
    (1003, 'auth.admin'::ltree, 'auth.admin', '{"label": "Admin", "transitive": true}'::jsonb),
    (1004, 'auth.execute'::ltree, 'auth.execute', '{"label": "Execute", "transitive": true}'::jsonb)
) AS seed_data(id, type_namespace, external_id, data)
ON CONFLICT (id) DO UPDATE
SET data = EXCLUDED.data, updated_at = now();

-- Advance past this range
SELECT setval('util.snowflake_system_seq', 1100);

Org Predicates (2001-3000)

INSERT INTO graph.resource (id, type_namespace, external_id, data) VALUES
    (2001, 'org.member', 'org.member', '{"label": "Member Of", "transitive": true}'),
    (2002, 'org.owns', 'org.owns', '{"label": "Owns", "transitive": false}'),
    (2003, 'org.manages', 'org.manages', '{"label": "Manages", "transitive": false}'),
    (2004, 'org.hasMember', 'org.hasMember', '{"label": "Has Member", "transitive": false}');

SELECT setval('graph.resource_id_system_seq', 2100);

System Resources (5001-10000)

-- Root/system user
INSERT INTO graph.resource (id, type_namespace, external_id, data, owner_id) VALUES
    (5001, 'auth.user', 'system', '{"name": "System", "system": true}', 5001);

-- System organization
INSERT INTO graph.resource (id, type_namespace, external_id, data, owner_id) VALUES
    (5002, 'auth.organization', 'system', '{"name": "System Organization"}', 5001);

SELECT setval('graph.resource_id_system_seq', 5100);

Application Constants

Export stable IDs as constants:

// src/lib/graph/constants.ts

/**
 * Stable system resource IDs
 * These IDs are reserved in the database and match seed data
 */
export const SYSTEM_RESOURCE_IDS = {
  // RDF Predicates (1-1000)
  RDF_TYPE: 1n,
  RDFS_SUBCLASS_OF: 2n,
  RDFS_LABEL: 3n,
  RDFS_COMMENT: 4n,
  RDFS_DOMAIN: 5n,
  RDFS_RANGE: 6n,
  
  // Auth Predicates (1001-2000)
  AUTH_READ: 1001n,
  AUTH_WRITE: 1002n,
  AUTH_ADMIN: 1003n,
  AUTH_EXECUTE: 1004n,
  
  // Org Predicates (2001-3000)
  ORG_MEMBER: 2001n,
  ORG_OWNS: 2002n,
  ORG_MANAGES: 2003n,
  ORG_HAS_MEMBER: 2004n,
  
  // System Resources (5001-10000)
  SYSTEM_USER: 5001n,
  SYSTEM_ORG: 5002n,
} as const;

/**
 * User ID range starts here
 */
export const USER_ID_START = 100001n;

/**
 * Check if ID is in system range
 */
export function isSystemId(id: bigint): boolean {
  return id < USER_ID_START;
}

/**
 * Check if ID is in user range
 */
export function isUserId(id: bigint): boolean {
  return id >= USER_ID_START;
}

Seed Data Script

-- db/303_graph_seed.sql
-- Seed data with stable system IDs

-- Reset system sequence to start
SELECT setval('util.snowflake_system_seq', 1, false);

-- RDF Core (1-1000)
INSERT INTO graph.resource (id, type_namespace, external_id, data)
SELECT * FROM (VALUES
    (1, 'rdf.type'::ltree, 'rdf.type', '{"label": "type"}'::jsonb),
    (2, 'rdfs.subClassOf'::ltree, 'rdfs.subClassOf', '{"label": "subClassOf"}'::jsonb),
    (3, 'rdfs.label'::ltree, 'rdfs.label', '{"label": "label"}'::jsonb),
    (4, 'rdfs.comment'::ltree, 'rdfs.comment', '{"label": "comment"}'::jsonb)
) AS seed_data(id, type_namespace, external_id, data)
ON CONFLICT (id) DO UPDATE
SET data = EXCLUDED.data,
    updated_at = now();

-- Auth Predicates (1001-2000)
SELECT setval('util.snowflake_system_seq', 1001, false);

INSERT INTO graph.resource (id, type_namespace, external_id, data)
SELECT * FROM (VALUES
    (1001, 'auth.read'::ltree, 'auth.read', '{"label": "Read", "transitive": true}'::jsonb),
    (1002, 'auth.write'::ltree, 'auth.write', '{"label": "Write", "transitive": true}'::jsonb),
    (1003, 'auth.admin'::ltree, 'auth.admin', '{"label": "Admin", "transitive": true}'::jsonb)
) AS seed_data(id, type_namespace, external_id, data)
ON CONFLICT (id) DO UPDATE
SET data = EXCLUDED.data,
    updated_at = now();

-- System Resources (5001-10000)
SELECT setval('util.snowflake_system_seq', 5001, false);

INSERT INTO graph.resource (id, type_namespace, external_id, data, owner_id)
VALUES (5001, 'auth.user'::ltree, 'system', '{"name": "System", "system": true}'::jsonb, 5001)
ON CONFLICT (id) DO UPDATE
SET data = EXCLUDED.data,
    updated_at = now();

-- Ensure user sequence starts after reserved range
SELECT setval('util.snowflake_user_seq', 100001, false);

Migration Safety

Idempotent Seed Data

Use ON CONFLICT to allow re-running:

INSERT INTO graph.resource (id, type_namespace, external_id, data)
VALUES (1, 'rdf.type', 'rdf.type', '{"label": "type"}')
ON CONFLICT (id) DO UPDATE
SET data = EXCLUDED.data,
    updated_at = now();

Verify Reserved Range

-- Check for conflicts
CREATE OR REPLACE FUNCTION graph.verify_reserved_range()
RETURNS TABLE (
    id BIGINT,
    type_namespace ltree,
    external_id text,
    is_conflict boolean
) AS $$
BEGIN
    RETURN QUERY
    SELECT 
        r.id,
        r.type_namespace,
        r.external_id,
        r.id < 100000 AND r.creator_id IS NOT NULL AS is_conflict
    FROM graph.resource r
    WHERE r.id < 100000
      AND NOT EXISTS (
          SELECT 1 FROM graph.resource seed
          WHERE seed.id = r.id
            AND seed.creator_id IS NULL  -- System resources have NULL creator
      );
END;
$$ LANGUAGE plpgsql;

-- Run check
SELECT * FROM graph.verify_reserved_range();

Testing

// tests/integration/graph/seed-data.test.ts

describe('Seed Data', () => {
  it('should have stable system resource IDs', async () => {
    // Use generated function from @db/graph
    const resource = await getResource(client, {
      p_id: SYSTEM_RESOURCE_IDS.RDF_TYPE
    });
    
    expect(BigInt(resource.id)).toBe(SYSTEM_RESOURCE_IDS.RDF_TYPE);
    expect(resource.external_id).toBe('rdf.type');
  });
  
  it('should not allow user IDs in reserved range', async () => {
    // Attempting to use a reserved ID should fail
    await expect(
      insertResource(client, {
        p_type_namespace: 'auth.user',
        p_external_id: 'test',
        p_data: {}
      })
    ).rejects.toThrow(); // ID generation should skip reserved range
  });
  
  it('should start user IDs after reserved range', async () => {
    // Use generated function from @db/graph
    const resourceId = await insertResource(client, {
      p_type_namespace: 'auth.user',
      p_external_id: 'test-user',
      p_data: {}
    });
    
    expect(resourceId).toBeGreaterThanOrEqual(USER_ID_START);
  });
});

Documentation Generation

Auto-generate constant docs:

// scripts/generate-system-constants.ts

import { Client } from 'pg';

async function generateSystemConstants() {
  const client = new Client(/* config */);
  await client.connect();
  
  // Use generated function to list system resources
  const resources = await listResources(client, {
    p_type_namespace: null,
    p_data_contains: null,
    p_limit: 100000,
    p_offset: 0
  });
  
  // Filter to system IDs only
  const systemResources = resources.filter(r => BigInt(r.id) < 100000n);
  
  console.log('// Auto-generated system resource IDs');
  console.log('export const SYSTEM_RESOURCE_IDS = {');
  
  for (const resource of systemResources) {
    const name = resource.external_id.toUpperCase().replace(/\./g, '_');
    console.log(`  ${name}: ${resource.id}n, // ${resource.data.label || resource.external_id}`);
  }
  
  console.log('} as const;');
  
  await client.end();
}

generateSystemConstants();

Best Practices

  1. Never hardcode IDs in application code - use constants
  2. Document ID ranges in migration comments
  3. Verify seed data in CI/CD pipeline
  4. Export constants from seed data automatically
  5. Test ID stability across environments

Implementation

This project uses Reserved Prefix approach:

  • System IDs: 1-100,000 (managed by util.snowflake_id_system())
  • User IDs: 100,001+ (managed by util.snowflake_id_user())
  • Clear range boundaries for easy debugging
  • Full 64-bit space available for user data
  • Simple validation with util.is_system_id()

Next Steps

  • Implement seed data scripts
  • Generate TypeScript constants
  • Add verification tests
  • Document in migration guide