Create Schema Script
This page contains a helper script to create a kdb Insights schema from a kdb+ database.
Options
The following options are accepted:
-db <path>
Path to database. Defaults to current directory.-out <path>
Output path. If not specified, outputs tostdout
, or current directory in the case of-fmt package
.-fmt (yaml|json|package)
Output format. Ifyaml
, outputs in the documented assembly format. Ifjson
, output is intended for use in the 'view code' feature of the kdb+ Insights UI assembly editor. Ifpackage
, the output is split into one file for each table,<tablename>.yaml
, in the format used in kdb+ Insights Enterprise packages, and the files are placed in the directory specified by-out
.
Script
// @desc Base names for types used by kdb+ Insights assembly files
typeNames:(.Q.t except" ")!key each(.Q.t except" ")$\:()
// @desc Nested columns are simple plurals, except 'chars' which becomes 'string'
typeNames:@[;"C";:;`string]typeNames,{(upper key x)!`$(string value x),'"s"}typeNames
// @desc Names for attributes used by assembly files
attrNames:`g`u`p`s!`grouped`unique`parted`sorted
// @desc Wraps `meta`, stripping out virtual partition columns.
// @param tbl Table to be described
// @return Table of metadata same as from `meta`
realMeta:{[tbl]m:meta tbl;if[1b~.Q.qp tbl;m:delete from m where c=.Q.pf];m}
// @desc Given a table, produces a data structure representing the assembly configuration for its columns.
// @param tbl Table (as value, not name)
// @return List of dicts of parameters for each column
describeCols:{[tbl] {if[`~x`type;x[`anymap]:`true];if[`=x`attrDisk;x:`attrDisk _x];x}each `name`type`attrDisk xcol `c`t`a#update typeNames t,attrNames a from 0!realMeta tbl}
// @desc Gets name, storage type and value of each table loaded.
// @return Table of name/type/value of tables
getTbls:{`name`type`val!/:{(x;((1b;0b;0)!`partitioned`splayed`basic) .Q.qp v;v:get x)}each tables[]}
// @desc Combines output of `getTbls` and `describeCols` into an assembly configuration.
// @param tblInfo Table output from `getTbls`
// @return Dict of parameters for each table, including parameters for their columns
describeTbls:{[tblInfo] (tblInfo`name)!{if[`<>s:first x[`columns;;`name]where x[`columns;;`attrDisk]in`parted`sorted;x[`sortColsDisk]:enlist s];x} each`type`columns#update columns:describeCols each val from tblInfo}
// @desc Out-of-band token that will be serialized into an invalid form to indicate where users must edit
EDITME:`$"@EDITME@"
// @desc Takes the output of `describeTbls` and annotates it with necessary user edits.
// @param dsc Dict output from `describeTbls`
// @return Pair `(dsc;msg)` of `dsc` with 'edit me' tokens added, and `msg` advising the user on the necessary edits
annotate:{[dsc]
reminder:"\n";
$[`json~args`fmt;
[fn:"JSON";reminder:"# REMINDER: Remove these comments from the JSON file before using it, as JSON does not natively support comments.\n",reminder];
fn:"YAML"];
msg:"# NOTE: This script generates a preliminary ",fn," file for kdb+ insights schema.\n",
"#\n",
"# IMPORTANT: The generated file is NOT a valid kdb+ insights schema yet.\n",
"# Manual review and updates are required for the following:\n",
"# 1. Define the `prtnCol` for each partitioned table.\n",
"# 2. Add necessary attributes for `idb` and `rdb` configurations.\n",
"# 3. Review and update `sort columns` as needed, ensuring extra columns are added where appropriate.\n";
dsc:{if[`partitioned=x`type;x[`prtnCol]:EDITME];x} each dsc;
dsc:{if[99h<>type x;:x];$[`attrDisk in key x;x,`attrOrd`attrMem!(EDITME;EDITME);x]}@/:/:/:dsc;
dsc:{if[`sortColsDisk in key x;x[`sortColsDisk],:EDITME];x}@/:dsc;
if[@[{.Q.pf};`;`date]<>`date;msg,:"# 4. Convert database to be partitioned by date rather than ",(string .Q.pf),".\n"];
if[count bad:where not `attrDisk in/:raze@'key@''dsc[p:where `partitioned=dsc[;`type];`columns];
msg,:"# 5. Add attributes to tables: ",(", "sv string p bad),", or queries on them could be very slow.\n"];
msg,:"#\n",
"# Please address these points before using the schema.\n",
reminder;
(dsc;msg)
}
// @desc Indents every line in a string by two spaces.
// @param x Input string
// @return Input string indented by two spaces
indent:{"\n"sv" ",/:"\n"vs x}
// @desc Serializes an atomic value. We use `.j.j` as JSON and YAML have compatible syntaxes for this. 'Edit me' tokens
// are converted into the invalid unquoted string `@EDITME@` at this stage.
// @param x Atomic value
// @return String representation of `x`
atomToStr:{$[x~EDITME;string EDITME;.j.j x]}
// @desc Outputs JSON with block indentation (unlike .j.j) and with special treatment of 'edit me' tokens.
// @param val Value to be seralised
// @return Serialised possibly-invalid JSON document
mkJSON:{[val]
t:type val;$[
t<0;atomToStr val;
t within 1 19;"[",(", "sv .z.s each val),"]";
t in 0 98h;"[\n",(indent",\n"sv .z.s each val),"\n]";
t=99h;"{\n",(indent",\n"sv": "sv/:flip (atomToStr each key val;.z.s each value val)),"\n}"
]
}
// @desc Outputs YAML in the same format as used in assembly specifications, and with special treatment of 'edit me' tokens.
// @param val Value to be serialised
// @return Serialised possibly-invalid YAML document
mkYAML:{[val]
t:type val;$[
t<0;atomToStr val;
t within 1 19;"[",(", "sv .z.s each val),"]";
t in 0 98h;"\n"sv{@[;0;:;"-"] indent x} each .z.s each val;
t=99h;"\n"sv": "sv/:flip (string key val;{$["\n" in x;"\n",indent x;x]}each .z.s each value val)
]
}
// @desc Converts a dict of dicts into a list by turning each key into a `name` sub-key. YAML assembly files are structured as a dict with
// table names as keys; JSON schemas in the UI and per-table YAML files in packages use a 'name' key for each table. This is used to
// convert from the former to the latter structure.
// @param dsc Dict of dicts output by `annotate`
// @return List of dicts
restructure:{[dsc]
((enlist`name)!/:enlist each key dsc),'value dsc
}
// @desc Writes a dict of dicts from `annotate` to disk by writing each key to disk in a separate file corresponding to the table name.
// @param dir Directory to write files into
// @param dsc Dict of dicts output by `annotate`
// @param msg String output by `annotate`
pkgOut:{[dir;dsc;msg] {.Q.dd[x;` sv(z`name),`yaml]0:enlist y,mkYAML z}[dir;msg]each restructure dsc}
// @desc Parses command-line arguments and calls functions to describe an output dictionary and output the description appropriately.
main:{
args::.Q.opt .z.x;
args::.Q.def[`db`out`fmt!(`.;`;`yaml)]args;
args[`out]:$[null args`out;`;(args`out)like "/*";hsym args`out;.Q.dd[hsym`$system"cd";args`out]];
@[system;"l ",string args`db;{[db;e]1"Cannot load database from ",db," ",e;exit 1}[string args`db]];
v:annotate describeTbls getTbls[];
dsc:v 0;
msg:v 1;
if[`package~args`fmt;pkgOut[args`out;dsc;msg];:()];
$[`~args`out;-1;'[0:[args`out];enlist]] msg,((`json`yaml!('[mkJSON;restructure];mkYAML))args`fmt) dsc
}
main[];exit 0