rssd-init.understand notebooks schema (Generative AI Large Language Model Prompt)
Understand the following structure of an SQLite database designed to store code notebooks and execution kernels.
The database comprises three main tables: 'code_notebook_kernel', 'code_notebook_cell', and 'code_notebook_state'.
1. 'code_notebook_kernel': A Notebook is a group of Cells. A kernel is a computational engine that executes the code contained in a notebook cell.
Each notebook is associated with a kernel of a specific programming language or code transformer which can interpret
code and produce a result. For example, a SQL notebook might use a SQLite kernel for running SQL code and an AI Prompt
might prepare AI prompts for LLMs.
2. 'code_notebook_cell': Each Notebook is divided into cells, which are individual units of interpretable code.
Each cell is linked to a kernel in the 'code_notebook_kernel' table via 'notebook_kernel_id'.
The content of Cells depends on the Notebook Kernel and contain the source code to be
executed by the Notebook's Kernel. The output of the code (text, graphics, etc.) can be
stateless or may be stateful and store its results and state transitions in code_notebook_state.
Notebooks can have multiple versions of cells, where the interpretable_code and other metadata
may be updated over time. Code notebook cells are unique for notebook_name, cell_name and
interpretable_code_hash which means there may be "duplicate" cells when interpretable_code
has been edited and updated over time.
3. 'code_notebook_state': Records the state of a notebook's cells' executions, computations, and results for Kernels that are stateful.
For example, a SQL Notebook Cell that creates tables should only be run once (meaning it's stateful).
Other Kernels might store results for functions and output defined in one cell can be used in later cells.
Each record links to a cell in the 'code_notebook_cell' table and includes information about the state transition,
such as the previous and new states, transition reason, and timestamps. Surveilr tracks "migratable" SQL by
looking in a special notebook called "ConstructionSqlNotebook" and any cells in that notebook are "candidates"
for migration. Candidates that do not have a 'EXECUTED' in the state table mean that specific cell has not been
"migrated" yet.
The relationships are as follows:
- Each cell in 'code_notebook_cell' is associated with a kernel in 'code_notebook_kernel'.
- The 'code_notebook_state' table tracks changes in the state of each cell, linking back to the 'code_notebook_cell' table.
Use the following SQLite tables and views to generate SQL queries that interact with these tables and once you understand them let me know so I can ask you for help:
-- code provenance: `RssdInitSqlNotebook.bootstrapDDL` (file:///home/baasit/www.surveilr.com/lib/std/lifecycle.sql.ts)
INSERT INTO "session_state_ephemeral" ("key", "value") VALUES ('current_user', 'baasit') ON CONFLICT DO UPDATE SET value = excluded.value;
INSERT INTO "session_state_ephemeral" ("key", "value") VALUES ('current_user_name', 'UNKNOWN') ON CONFLICT DO UPDATE SET value = excluded.value;
CREATE TABLE IF NOT EXISTS "assurance_schema" (
"assurance_schema_id" VARCHAR PRIMARY KEY NOT NULL,
"assurance_type" TEXT NOT NULL,
"code" TEXT NOT NULL,
"code_json" TEXT CHECK(json_valid(code_json) OR code_json IS NULL),
"governance" TEXT CHECK(json_valid(governance) OR governance IS NULL),
"created_at" TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT DEFAULT 'UNKNOWN',
"updated_at" TIMESTAMPTZ,
"updated_by" TEXT,
"deleted_at" TIMESTAMPTZ,
"deleted_by" TEXT,
"activity_log" TEXT
);
CREATE TABLE IF NOT EXISTS "code_notebook_kernel" (
"code_notebook_kernel_id" VARCHAR PRIMARY KEY NOT NULL,
"kernel_name" TEXT NOT NULL,
"description" TEXT,
"mime_type" TEXT,
"file_extn" TEXT,
"elaboration" TEXT CHECK(json_valid(elaboration) OR elaboration IS NULL),
"governance" TEXT CHECK(json_valid(governance) OR governance IS NULL),
"created_at" TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT DEFAULT 'UNKNOWN',
"updated_at" TIMESTAMPTZ,
"updated_by" TEXT,
"deleted_at" TIMESTAMPTZ,
"deleted_by" TEXT,
"activity_log" TEXT,
UNIQUE("kernel_name")
);
CREATE TABLE IF NOT EXISTS "code_notebook_cell" (
"code_notebook_cell_id" VARCHAR PRIMARY KEY NOT NULL,
"notebook_kernel_id" VARCHAR NOT NULL,
"notebook_name" TEXT NOT NULL,
"cell_name" TEXT NOT NULL,
"cell_governance" TEXT CHECK(json_valid(cell_governance) OR cell_governance IS NULL),
"interpretable_code" TEXT NOT NULL,
"interpretable_code_hash" TEXT NOT NULL,
"description" TEXT,
"arguments" TEXT CHECK(json_valid(arguments) OR arguments IS NULL),
"created_at" TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT DEFAULT 'UNKNOWN',
"updated_at" TIMESTAMPTZ,
"updated_by" TEXT,
"deleted_at" TIMESTAMPTZ,
"deleted_by" TEXT,
"activity_log" TEXT,
FOREIGN KEY("notebook_kernel_id") REFERENCES "code_notebook_kernel"("code_notebook_kernel_id"),
UNIQUE("notebook_name", "cell_name", "interpretable_code_hash")
);
CREATE TABLE IF NOT EXISTS "code_notebook_state" (
"code_notebook_state_id" VARCHAR PRIMARY KEY NOT NULL,
"code_notebook_cell_id" VARCHAR NOT NULL,
"from_state" TEXT NOT NULL,
"to_state" TEXT NOT NULL,
"transition_result" TEXT CHECK(json_valid(transition_result) OR transition_result IS NULL),
"transition_reason" TEXT,
"transitioned_at" TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
"elaboration" TEXT CHECK(json_valid(elaboration) OR elaboration IS NULL),
"created_at" TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT DEFAULT 'UNKNOWN',
"updated_at" TIMESTAMPTZ,
"updated_by" TEXT,
"deleted_at" TIMESTAMPTZ,
"deleted_by" TEXT,
"activity_log" TEXT,
FOREIGN KEY("code_notebook_cell_id") REFERENCES "code_notebook_cell"("code_notebook_cell_id"),
UNIQUE("code_notebook_cell_id", "from_state", "to_state")
);
DROP VIEW IF EXISTS "code_notebook_cell_versions";
CREATE VIEW IF NOT EXISTS "code_notebook_cell_versions" AS
-- code provenance: `RssdInitSqlNotebook.notebookBusinessLogicViews` (file:///home/baasit/www.surveilr.com/lib/std/lifecycle.sql.ts)
-- All cells and how many different versions of each cell are available
SELECT notebook_name,
notebook_kernel_id,
cell_name,
COUNT(*) OVER(PARTITION BY notebook_name, cell_name) AS versions,
code_notebook_cell_id
FROM code_notebook_cell
ORDER BY notebook_name, cell_name;
DROP VIEW IF EXISTS "code_notebook_cell_latest";
CREATE VIEW IF NOT EXISTS "code_notebook_cell_latest" AS
-- code provenance: `RssdInitSqlNotebook.notebookBusinessLogicViews` (file:///home/baasit/www.surveilr.com/lib/std/lifecycle.sql.ts)
-- Retrieve the latest version of each code_notebook_cell.
-- Notebooks can have multiple versions of cells, where the interpretable_code and other metadata may be updated over time.
-- The latest record is determined by the most recent COALESCE(updated_at, created_at) timestamp.
SELECT
c.code_notebook_cell_id, -- Selects the unique ID of the notebook cell
c.notebook_kernel_id, -- References the kernel associated with this cell
c.notebook_name, -- The name of the notebook containing this cell
c.cell_name, -- The name of the cell within the notebook
c.interpretable_code, -- The latest interpretable code associated with the cell
c.interpretable_code_hash, -- Hash of the latest interpretable code
c.description, -- Description of the cell's purpose or content
c.cell_governance, -- Governance details for the cell (if any)
c.arguments, -- Arguments or parameters related to the cell's execution
c.activity_log, -- Log of activities related to this cell
COALESCE(c.updated_at, c.created_at) AS version_timestamp -- The latest timestamp (updated or created)
FROM (
SELECT
code_notebook_cell_id,
notebook_kernel_id,
notebook_name,
cell_name,
interpretable_code,
interpretable_code_hash,
description,
cell_governance,
arguments,
activity_log,
updated_at,
created_at,
ROW_NUMBER() OVER (
PARTITION BY code_notebook_cell_id
ORDER BY COALESCE(updated_at, created_at) DESC -- Orders by the latest timestamp
) AS rn
FROM
code_notebook_cell
) c WHERE c.rn = 1;
DROP VIEW IF EXISTS "code_notebook_sql_cell_migratable_version";
CREATE VIEW IF NOT EXISTS "code_notebook_sql_cell_migratable_version" AS
-- code provenance: `RssdInitSqlNotebook.notebookBusinessLogicViews` (file:///home/baasit/www.surveilr.com/lib/std/lifecycle.sql.ts)
-- All cells that are candidates for migration (including duplicates)
SELECT c.code_notebook_cell_id,
c.notebook_name,
c.cell_name,
c.interpretable_code,
c.interpretable_code_hash,
CASE WHEN c.cell_name LIKE '%_once_%' THEN FALSE ELSE TRUE END AS is_idempotent,
COALESCE(c.updated_at, c.created_at) version_timestamp
FROM code_notebook_cell c
WHERE c.notebook_name = 'ConstructionSqlNotebook'
ORDER BY c.cell_name;
DROP VIEW IF EXISTS "code_notebook_sql_cell_migratable";
CREATE VIEW IF NOT EXISTS "code_notebook_sql_cell_migratable" AS
-- code provenance: `RssdInitSqlNotebook.notebookBusinessLogicViews` (file:///home/baasit/www.surveilr.com/lib/std/lifecycle.sql.ts)
-- All cells that are candidates for migration (latest only)
SELECT c.*,
CASE WHEN c.cell_name LIKE '%_once_%' THEN FALSE ELSE TRUE END AS is_idempotent
FROM code_notebook_cell_latest c
WHERE c.notebook_name = 'ConstructionSqlNotebook'
ORDER BY c.cell_name;
DROP VIEW IF EXISTS "code_notebook_sql_cell_migratable_state";
CREATE VIEW IF NOT EXISTS "code_notebook_sql_cell_migratable_state" AS
-- code provenance: `RssdInitSqlNotebook.notebookBusinessLogicViews` (file:///home/baasit/www.surveilr.com/lib/std/lifecycle.sql.ts)
-- All cells that are candidates for migration (latest only)
SELECT
c.*, -- All columns from the code_notebook_sql_cell_migratable view
s.from_state, -- The state the cell transitioned from
s.to_state, -- The state the cell transitioned to
s.transition_reason, -- The reason for the state transition
s.transition_result, -- The result of the state transition
s.transitioned_at -- The timestamp of the state transition
FROM
code_notebook_sql_cell_migratable c
JOIN
code_notebook_state s
ON c.code_notebook_cell_id = s.code_notebook_cell_id
ORDER BY c.cell_name;
DROP VIEW IF EXISTS "code_notebook_sql_cell_migratable_not_executed";
CREATE VIEW IF NOT EXISTS "code_notebook_sql_cell_migratable_not_executed" AS
-- code provenance: `RssdInitSqlNotebook.notebookBusinessLogicViews` (file:///home/baasit/www.surveilr.com/lib/std/lifecycle.sql.ts)
-- All latest migratable cells that have not yet been "executed" (based on the code_notebook_state table)
SELECT c.*
FROM code_notebook_sql_cell_migratable c
LEFT JOIN code_notebook_state s
ON c.code_notebook_cell_id = s.code_notebook_cell_id AND s.to_state = 'EXECUTED'
WHERE s.code_notebook_cell_id IS NULL
ORDER BY c.cell_name;
DROP VIEW IF EXISTS "code_notebook_migration_sql";
CREATE VIEW IF NOT EXISTS "code_notebook_migration_sql" AS
-- code provenance: `RssdInitSqlNotebook.notebookBusinessLogicViews` (file:///home/baasit/www.surveilr.com/lib/std/lifecycle.sql.ts)
-- Creates a dynamic migration script by concatenating all interpretable_code for cells that should be migrated.
-- Excludes cells with names containing '_once_' if they have already been executed.
-- Includes comments before each block and special comments for excluded cells.
-- Wraps everything in a single transaction
SELECT
'BEGIN TRANSACTION;
'||
'CREATE TEMP TABLE IF NOT EXISTS "session_state_ephemeral" (
"key" TEXT PRIMARY KEY NOT NULL,
"value" TEXT NOT NULL
);
' ||
GROUP_CONCAT(
CASE
-- Case 1: Non-idempotent and already executed
WHEN c.is_idempotent = FALSE AND s.code_notebook_cell_id IS NOT NULL THEN
'-- ' || c.notebook_name || '.' || c.cell_name || ' not included because it is non-idempotent and was already executed on ' || s.transitioned_at || '
'
-- Case 2: Idempotent and not yet executed, idempotent and being reapplied, or non-idempotent and being run for the first time
ELSE
'-- ' || c.notebook_name || '.' || c.cell_name || '
' ||
CASE
-- First execution (non-idempotent or idempotent)
WHEN s.code_notebook_cell_id IS NULL THEN
'-- Executing for the first time.
'
-- Reapplying execution (idempotent)
ELSE
'-- Reapplying execution. Last executed on ' || s.transitioned_at || '
'
END ||
c.interpretable_code || '
' ||
'INSERT INTO code_notebook_state (code_notebook_state_id, code_notebook_cell_id, from_state, to_state, transition_reason, created_at) ' ||
'VALUES (' ||
'''' || c.code_notebook_cell_id || '__' || strftime('%Y%m%d%H%M%S', 'now') || '''' || ', ' ||
'''' || c.code_notebook_cell_id || '''' || ', ' ||
'''MIGRATION_CANDIDATE''' || ', ' ||
'''EXECUTED''' || ', ' ||
CASE
WHEN s.code_notebook_cell_id IS NULL THEN '''Migration'''
ELSE '''Reapplication'''
END || ', ' ||
'CURRENT_TIMESTAMP' || ')' || '
' ||
'ON CONFLICT(code_notebook_cell_id, from_state, to_state) DO UPDATE SET updated_at = CURRENT_TIMESTAMP, ' ||
'transition_reason = ''Reapplied ' || datetime('now', 'localtime') || ''';' || '
'
END,
'
'
) || '
COMMIT;' AS migration_sql
FROM
code_notebook_sql_cell_migratable c
LEFT JOIN
code_notebook_state s
ON c.code_notebook_cell_id = s.code_notebook_cell_id AND s.to_state = 'EXECUTED'
ORDER BY
c.cell_name;
DROP VIEW IF EXISTS "plm_graph";
CREATE VIEW IF NOT EXISTS "plm_graph" AS
SELECT
ure.graph_name,
ure.nature,
ur.uniform_resource_id,
ur.uri,
ur_ingest_plm.ur_ingest_session_plm_acct_project_issue_id,
ur_ingest_plm.issue_id,
ur_ingest_plm.ur_ingest_session_plm_acct_project_id as project_id,
ur_ingest_plm.title,
ur_ingest_plm.body
FROM
uniform_resource_edge ure
JOIN
uniform_resource ur ON ure.uniform_resource_id = ur.uniform_resource_id
JOIN
ur_ingest_session_plm_acct_project_issue ur_ingest_plm ON ure.node_id = ur_ingest_plm.ur_ingest_session_plm_acct_project_issue_id
WHERE
ure.graph_name = 'plm';
;
DROP VIEW IF EXISTS "imap_graph";
CREATE VIEW IF NOT EXISTS "imap_graph" AS
SELECT
ure.graph_name,
ur.uniform_resource_id,
ur.nature,
ur.uri,
ur.content,
ur_ingest_imap.ur_ingest_session_imap_acct_folder_message_id,
ur_ingest_imap.ingest_imap_acct_folder_id,
ur_ingest_imap.message_id
FROM
uniform_resource_edge ure
JOIN
uniform_resource ur ON ure.uniform_resource_id = ur.uniform_resource_id
JOIN
ur_ingest_session_imap_acct_folder_message ur_ingest_imap ON ure.node_id = ur_ingest_imap.ur_ingest_session_imap_acct_folder_message_id
WHERE
ure.graph_name = 'imap';
;
DROP VIEW IF EXISTS "filesystem_graph";
CREATE VIEW IF NOT EXISTS "filesystem_graph" AS
SELECT
ure.graph_name,
ur.uniform_resource_id,
ur.nature,
ur.uri,
ur_ingest_fs_path.ur_ingest_session_fs_path_id,
ur_ingest_fs_path.root_path
FROM
uniform_resource_edge ure
JOIN
uniform_resource ur ON ure.uniform_resource_id = ur.uniform_resource_id
JOIN
ur_ingest_session_fs_path ur_ingest_fs_path ON ure.node_id = ur_ingest_fs_path.ur_ingest_session_fs_path_id
WHERE
ure.graph_name = 'filesystem';
;
DROP TABLE IF EXISTS surveilr_function_doc;
CREATE TABLE IF NOT EXISTS surveilr_function_doc (
name TEXT PRIMARY KEY,
description TEXT,
parameters JSON,
return_type TEXT,
version TEXT
);
INSERT INTO surveilr_function_doc (name, description, parameters, return_type, version)
SELECT name, description, parameters, return_type, version
FROM surveilr_function_docs();
;