Skip to content

Create Schema Script

This page contains a helper script to create a kdb Insights schema from a kdb+ database.

Options

The following options are accepted:

  • -db <path> Path to database. Defaults to current directory.
  • -out <path> Output path. If not specified, outputs to stdout, or current directory in the case of -fmt package.
  • -fmt (yaml|json|package) Output format. If yaml, outputs in the documented assembly format. If json, output is intended for use in the 'view code' feature of the kdb+ Insights UI assembly editor. If package, the output is split into one file for each table, <tablename>.yaml, in the format used in kdb+ Insights Enterprise packages, and the files are placed in the directory specified by -out.

Script

// @desc    Base names for types used by kdb+ Insights assembly files
typeNames:(.Q.t except" ")!key each(.Q.t except" ")$\:()
// @desc    Nested columns are simple plurals, except 'chars' which becomes 'string'
typeNames:@[;"C";:;`string]typeNames,{(upper key x)!`$(string value x),'"s"}typeNames
// @desc    Names for attributes used by assembly files
attrNames:`g`u`p`s!`grouped`unique`parted`sorted

// @desc    Wraps `meta`, stripping out virtual partition columns.
// @param   tbl     Table to be described
// @return  Table of metadata same as from `meta`
realMeta:{[tbl]m:meta tbl;if[1b~.Q.qp tbl;m:delete from m where c=.Q.pf];m}

// @desc    Given a table, produces a data structure representing the assembly configuration for its columns.
// @param   tbl     Table (as value, not name)
// @return  List of dicts of parameters for each column
describeCols:{[tbl] {if[`~x`type;x[`anymap]:`true];if[`=x`attrDisk;x:`attrDisk _x];x}each `name`type`attrDisk xcol `c`t`a#update typeNames t,attrNames a from 0!realMeta tbl}

// @desc    Gets name, storage type and value of each table loaded.
// @return  Table of name/type/value of tables
getTbls:{`name`type`val!/:{(x;((1b;0b;0)!`partitioned`splayed`basic) .Q.qp v;v:get x)}each tables[]}

// @desc    Combines output of `getTbls` and `describeCols` into an assembly configuration.
// @param   tblInfo Table output from `getTbls`
// @return  Dict of parameters for each table, including parameters for their columns
describeTbls:{[tblInfo] (tblInfo`name)!{if[`<>s:first x[`columns;;`name]where x[`columns;;`attrDisk]in`parted`sorted;x[`sortColsDisk]:enlist s];x} each`type`columns#update columns:describeCols each val from tblInfo}

// @desc    Out-of-band token that will be serialized into an invalid form to indicate where users must edit
EDITME:`$"@EDITME@"

// @desc    Takes the output of `describeTbls` and annotates it with necessary user edits.
// @param   dsc     Dict output from `describeTbls`
// @return  Pair `(dsc;msg)` of `dsc` with 'edit me' tokens added, and `msg` advising the user on the necessary edits
annotate:{[dsc]
    reminder:"\n";
    $[`json~args`fmt;
        [fn:"JSON";reminder:"# REMINDER: Remove these comments from the JSON file before using it, as JSON does not natively support comments.\n",reminder];
        fn:"YAML"];

    msg:"# NOTE: This script generates a preliminary ",fn," file for kdb+ insights schema.\n",
    "#\n",
    "# IMPORTANT: The generated file is NOT a valid kdb+ insights schema yet.\n",
    "# Manual review and updates are required for the following:\n",
    "# 1. Define the `prtnCol` for each partitioned table.\n",
    "# 2. Add necessary attributes for `idb` and `rdb` configurations.\n",
    "# 3. Review and update `sort columns` as needed, ensuring extra columns are added where appropriate.\n";
    dsc:{if[`partitioned=x`type;x[`prtnCol]:EDITME];x} each dsc;
    dsc:{if[99h<>type x;:x];$[`attrDisk in key x;x,`attrOrd`attrMem!(EDITME;EDITME);x]}@/:/:/:dsc;
    dsc:{if[`sortColsDisk in key x;x[`sortColsDisk],:EDITME];x}@/:dsc;
    if[@[{.Q.pf};`;`date]<>`date;msg,:"# 4. Convert database to be partitioned by date rather than ",(string .Q.pf),".\n"];
    if[count bad:where not `attrDisk in/:raze@'key@''dsc[p:where `partitioned=dsc[;`type];`columns];
        msg,:"# 5. Add attributes to tables: ",(", "sv string p bad),", or queries on them could be very slow.\n"];
    msg,:"#\n",
    "# Please address these points before using the schema.\n",
    reminder;
    (dsc;msg)
 }

// @desc    Indents every line in a string by two spaces.
// @param   x       Input string
// @return  Input string indented by two spaces
indent:{"\n"sv"  ",/:"\n"vs x}

// @desc    Serializes an atomic value. We use `.j.j` as JSON and YAML have compatible syntaxes for this. 'Edit me' tokens
//          are converted into the invalid unquoted string `@EDITME@` at this stage.
// @param   x       Atomic value
// @return  String representation of `x`
atomToStr:{$[x~EDITME;string EDITME;.j.j x]}

// @desc    Outputs JSON with block indentation (unlike .j.j) and with special treatment of 'edit me' tokens.
// @param   val     Value to be seralised
// @return  Serialised possibly-invalid JSON document
mkJSON:{[val]
    t:type val;$[
    t<0;atomToStr val;
    t within 1 19;"[",(", "sv .z.s each val),"]";
    t in 0 98h;"[\n",(indent",\n"sv .z.s each val),"\n]";
    t=99h;"{\n",(indent",\n"sv": "sv/:flip (atomToStr each key val;.z.s each value val)),"\n}"
    ]
 }

// @desc    Outputs YAML in the same format as used in assembly specifications, and with special treatment of 'edit me' tokens.
// @param   val     Value to be serialised
// @return  Serialised possibly-invalid YAML document
mkYAML:{[val]
    t:type val;$[
    t<0;atomToStr val;
    t within 1 19;"[",(", "sv .z.s each val),"]";
    t in 0 98h;"\n"sv{@[;0;:;"-"] indent x} each .z.s each val;
    t=99h;"\n"sv": "sv/:flip (string key val;{$["\n" in x;"\n",indent x;x]}each .z.s each value val)
    ]
 }

// @desc    Converts a dict of dicts into a list by turning each key into a `name` sub-key. YAML assembly files are structured as a dict with
//          table names as keys; JSON schemas in the UI and per-table YAML files in packages use a 'name' key for each table. This is used to
//          convert from the former to the latter structure.
// @param   dsc     Dict of dicts output by `annotate`
// @return  List of dicts 
restructure:{[dsc]
  ((enlist`name)!/:enlist each key dsc),'value dsc
  }

// @desc    Writes a dict of dicts from `annotate` to disk by writing each key to disk in a separate file corresponding to the table name.
// @param   dir     Directory to write files into
// @param   dsc     Dict of dicts output by `annotate`
// @param   msg     String output by `annotate`
pkgOut:{[dir;dsc;msg] {.Q.dd[x;` sv(z`name),`yaml]0:enlist y,mkYAML z}[dir;msg]each restructure dsc}

// @desc    Parses command-line arguments and calls functions to describe an output dictionary and output the description appropriately.
main:{
    args::.Q.opt .z.x;
    args::.Q.def[`db`out`fmt!(`.;`;`yaml)]args;
    args[`out]:$[null args`out;`;(args`out)like "/*";hsym args`out;.Q.dd[hsym`$system"cd";args`out]];
    @[system;"l ",string args`db;{[db;e]1"Cannot load database from ",db," ",e;exit 1}[string args`db]];
    v:annotate describeTbls getTbls[];
    dsc:v 0;
    msg:v 1;
    if[`package~args`fmt;pkgOut[args`out;dsc;msg];:()];
    $[`~args`out;-1;'[0:[args`out];enlist]] msg,((`json`yaml!('[mkJSON;restructure];mkYAML))args`fmt) dsc
 }

main[];exit 0