Database interaction and management functionality
Functionality for the interaction with and management of databases.
Warning
This functionality is provided in it's present form as a BETA
Feature and is subject to change. To enable this functionality
for testing please following configuration instructions
here setting PYKX_BETA_FEATURES='true'
DB
DB(*, path=None)
Bases: _TABLES
Singleton class used for the management of kdb+ Databases
create
create(
table,
table_name,
partition,
*,
by_field=None,
sym_enum=None,
log=True,
compress=None,
encrypt=None
)
Create an on-disk partitioned table within a kdb+ database from a supplied
pykx.Table
object. Once generated this table will be accessible
as an attribute of the DB
class or a sub attribute of DB.table
.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
table |
The |
required | |
table_name |
The name with which the table will be persisted and accessible
once loaded and available as a |
required | |
partition |
The name of the column which is to be used to partition the data if
supplied as a |
required | |
by_field |
A field of the table to be used as a by column, this column will be the second column in the table (the first being the virtual column determined by the partitioning column) |
None
|
|
sym_enum |
The name of the symbol enumeration table to be associated with the table |
None
|
|
log |
Print information about status of partitioned datab |
True
|
|
compress |
|
None
|
|
encrypt |
|
None
|
Returns:
Type | Description |
---|---|
A |
Examples:
Generate a partitioned table from a table containing multiple partitions
>>> import pykx as kx
>>> db = kx.DB(path = '/tmp/newDB')
>>> N = 1000
>>> qtab = kx.Table(data = {
... 'date': kx.q.asc(kx.random.random(N, kx.q('2020.01 2020.02 2020.03m'))),
... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),
... 'price': kx.random.random(N, 10.0),
... 'size': kx.random.random(N, 100)
... })
>>> db.create(qtab, 'stocks', 'date', by_field = 'sym', sym_enum = 'symbols')
>>> db.tables
['stocks']
>>> db.stocks
pykx.PartitionedTable(pykx.q('
month sym price size
---------------------------
2020.01 AAPL 7.979004 85
2020.01 AAPL 5.931866 55
2020.01 AAPL 5.255477 49
2020.01 AAPL 8.15255 74
2020.01 AAPL 4.771067 80
..
'))
Add a table as a partition to an on-disk database, in the example below we are adding a partition to the table generated above
>>> import pykx as kx
>>> db = kx.DB(path = '/tmp/newDB')
>>> N = 333
>>> qtab = kx.Table(data = {
... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),
... 'price': kx.random.random(N, 10.0),
... 'size': kx.random.random(N, 100)
... })
>>> db.create(qtab, 'stocks', kx.q('2020.04m'), by_field = 'sym', sym_enum = 'symbols')
>>> db.tables
['stocks']
>>> db.stocks
pykx.PartitionedTable(pykx.q('
month sym price size
---------------------------
2020.01 AAPL 7.979004 85
2020.01 AAPL 5.931866 55
2020.01 AAPL 5.255477 49
2020.01 AAPL 8.15255 74
2020.01 AAPL 4.771067 80
..
'))
Add a table as a partition to an on-disk database, in the example below we are additionally applying gzip compression to the persisted table
>>> import pykx as kx
>>> db = kx.DB(path = '/tmp/newDB')
>>> N = 333
>>> qtab = kx.Table(data = {
... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),
... 'price': kx.random.random(N, 10.0),
... 'size': kx.random.random(N, 100)
... })
>>> compress = kx.Compress(kx.CompressionAlgorithm.gzip, level=2)
>>> db.create(qtab, 'stocks', kx.q('2020.04m'), compress=compress)
>>> kx.q('{-21!hsym x}', '/tmp/newDB/2020.04/stocks/price')
pykx.Dictionary(pykx.q('
compressedLength | 2064
uncompressedLength| 2680
algorithm | 2i
logicalBlockSize | 17i
zipLevel | 2i
'))
load
load(path, *, overwrite=False, encrypt=None)
Load the tables associated with a kdb+ Database, once loaded a table
is accessible as an attribute of the DB
class or a sub attribute
of DB.table
. Note that can alternatively be called when providing a path
on initialisation of the DB class.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
path |
Union[Path, str]
|
The file system path at which your database is located |
required |
overwrite |
Should loading of the database overwrite any currently loaded databases |
False
|
Returns:
Type | Description |
---|---|
A |
Examples:
Load an on-disk database
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testData')
>>> db.tables
['testData']
>>> db.testData
pykx.PartitionedTable(pykx.q('
month sym time price size
---------------------------------------
2020.01 FDP 00:00:00.004 90.94738 12
2020.01 FDP 00:00:00.005 33.81127 15
2020.01 FDP 00:00:00.027 88.89853 16
2020.01 FDP 00:00:00.035 78.33244 9
2020.01 JPM 00:00:00.055 68.65177 1
..
'))
>>> db.table.testData
pykx.PartitionedTable(pykx.q('
month sym time price size
---------------------------------------
2020.01 FDP 00:00:00.004 90.94738 12
2020.01 FDP 00:00:00.005 33.81127 15
2020.01 FDP 00:00:00.027 88.89853 16
2020.01 FDP 00:00:00.035 78.33244 9
2020.01 JPM 00:00:00.055 68.65177 1
..
'))
Load an on-disk database when initialising the class
>>> import pykx as kx
>>> db = kx.DB(path = 'testData')
>>> db.tables
['testData']
>>> db.testData
pykx.PartitionedTable(pykx.q('
month sym time price size
---------------------------------------
2020.01 FDP 00:00:00.004 90.94738 12
2020.01 FDP 00:00:00.005 33.81127 15
2020.01 FDP 00:00:00.027 88.89853 16
2020.01 FDP 00:00:00.035 78.33244 9
2020.01 JPM 00:00:00.055 68.65177 1
..
'))
>>> db.table.testData
pykx.PartitionedTable(pykx.q('
month sym time price size
---------------------------------------
2020.01 FDP 00:00:00.004 90.94738 12
2020.01 FDP 00:00:00.005 33.81127 15
2020.01 FDP 00:00:00.027 88.89853 16
2020.01 FDP 00:00:00.035 78.33244 9
2020.01 JPM 00:00:00.055 68.65177 1
..
'))
rename_column
rename_column(table, original_name, new_name)
Rename a column within a loaded kdb+ Database
Parameters:
Name | Type | Description | Default |
---|---|---|---|
table |
The name of the table within which a column is to be renamed |
required | |
original_name |
Name of the column which is to be renamed |
required | |
new_name |
Column name which will be used as the new column name |
required |
Returns:
Type | Description |
---|---|
A |
Examples:
Rename the column of a table
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testDB')
>>> db.tables
['testTable']
>>> db.list_columns('testTable')
['month', 'sym', 'time', 'price', 'size']
>>> db.rename_column('testTable', 'sym', 'symbol')
>>> db.testTable
pykx.PartitionedTable(pykx.q('
month symbol time price size
---------------------------------------
2020.01 FDP 00:00:00.004 90.94738 12
2020.01 FDP 00:00:00.005 33.81127 15
2020.01 FDP 00:00:00.027 88.89853 16
2020.01 FDP 00:00:00.035 78.33244 9
2020.01 JPM 00:00:00.055 68.65177 1
..
'))
delete_column
delete_column(table, column)
Delete the column of a loaded kdb+ Database
Parameters:
Name | Type | Description | Default |
---|---|---|---|
table |
The name of the table within which a column is to be deleted |
required | |
column |
Column which is to be deleted from the database |
required |
Returns:
Type | Description |
---|---|
A |
Examples:
Delete the column of a table
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testDB')
>>> db.tables
['testTable']
>>> db.list_columns('testTable')
['month', 'sym', 'time', 'price', 'size']
>>> db.delete_column('testTable', 'size')
>>> db.testTable
pykx.PartitionedTable(pykx.q('
month symbol time price
-------------------------------------
2020.01 FDP 00:00:00.004 90.94738
2020.01 FDP 00:00:00.005 33.81127
2020.01 FDP 00:00:00.027 88.89853
2020.01 FDP 00:00:00.035 78.33244
2020.01 JPM 00:00:00.055 68.65177
..
'))
rename_table
rename_table(original_name, new_name)
Rename a table within a loaded kdb+ Database
Parameters:
Name | Type | Description | Default |
---|---|---|---|
original_name |
The name of the table which is to be renamed |
required | |
new_name |
Updated table name |
required |
Returns:
Type | Description |
---|---|
A |
Examples:
Rename a database table
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testDB')
>>> db.tables
['testTable']
>>> db.rename_table('testTable', 'updated_table')
>>> db.tables
['updated_table']
list_columns
list_columns(table)
List the columns of a table within a loaded kdb+ Database
Parameters:
Name | Type | Description | Default |
---|---|---|---|
table |
The name of the table whose columns are listed |
required |
Returns:
Type | Description |
---|---|
A list of strings defining the columns of a table |
Examples:
List the columns of a table in a database
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testDB')
>>> db.tables
['testTable']
>>> db.list_columns('testTable')
['month', 'sym', 'time', 'price', 'size']
add_column
add_column(table, column_name, default_value)
Add a column to a table within a loaded kdb+ Database
Parameters:
Name | Type | Description | Default |
---|---|---|---|
table |
The name of the table to which a column is to be added |
required | |
column_name |
Name of the column to be added |
required | |
default_value |
The default value to be used for all existing partitions |
required |
Returns:
Type | Description |
---|---|
A |
Examples:
Add a column to a table within a partitioned database
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testDB')
>>> db.tables
['testTable']
>>> db.list_columns('testTable')
['month', 'sym', 'time', 'price', 'size']
>>> db.add_column('testTable', 'test', kx.IntAtom.null)
>>> db.list_columns('testTable')
['month', 'sym', 'time', 'price', 'size']
find_column
find_column(table, column_name)
Functionality for finding a column across partitions within a loaded kdb+ Database
Parameters:
Name | Type | Description | Default |
---|---|---|---|
table |
The name of the table within which columns are to be found |
required | |
column_name |
The name of the column to be found within a table |
required |
Returns:
Type | Description |
---|---|
A |
|
if a column does not exist in a specified partition an error will be raised |
|
and the logs will indicate which columns did not have the specified column. |
Examples:
Find a column that exists
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testDB')
>>> db.tables
['testTable']
>>> db.list_columns('testTable')
['month', 'sym', 'time', 'price', 'size']
>>> db.find_column('price')
2023.11.10 16:48:57 column price (type 0) in `:/usr/pykx/db/2015.01.01/testTable
2023.11.10 16:48:57 column price (type 0) in `:/usr/pykx/db/2015.01.02/testTable
Attempt to find a column that does not exist
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testDB')
>>> db.tables
['testTable']
>>> db.list_columns('testTable')
['month', 'sym', 'time', 'price', 'size']
>>> db.find_column('side')
2023.11.10 16:49:02 column side *NOT*FOUND* in `:/usr/pykx/db/2015.01.01/testTable
2023.11.10 16:49:02 column side *NOT*FOUND* in `:/usr/pykx/db/2015.01.02/testTable
Traceback (most recent call last):
...
pykx.exceptions.QError: Requested column not found in all partitions, see log output above
reorder_columns
reorder_columns(table, new_order)
Reorder the columns of a persisted kdb+ database
Parameters:
Name | Type | Description | Default |
---|---|---|---|
table |
The name of the table within which columns will be rearranged |
required | |
new_order |
The ordering of the columns following update |
required |
Returns:
Type | Description |
---|---|
A |
Examples:
Update the order of columns for a persisted kdb+ database
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testDB')
>>> db.tables
['testTable']
>>> col_list = db.list_columns('testTable')
>>> col_list
['month', 'sym', 'time', 'price', 'size']
>>> col_list.reverse()
>>> col_list
['size', 'price', 'time', 'sym', 'month']
>>> db.reorder_columns('testTable', col_list)
2023.11.13 17:56:17 reordering columns in `:/usr/pykx/2015.01.01/testTable
2023.11.13 17:56:17 reordering columns in `:/usr/pykx/2015.01.02/testTable
['month', 'sym', 'time', 'price', 'size']
set_column_attribute
set_column_attribute(table, column_name, new_attribute)
Set an attribute associated with a column for an on-disk database
Parameters:
Name | Type | Description | Default |
---|---|---|---|
table |
The name of the table within which an attribute will be set |
required | |
column_name |
Name of the column to which the attribute will be applied |
required | |
new_attribute |
The attribute which is to be applied, this can be one of 'sorted'/'u', 'partitioned'/'p', 'unique'/'u' or 'grouped'/'g'. |
required |
Returns:
Type | Description |
---|---|
A |
Examples:
Add an attribute to a column of a persisted database
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testDB')
>>> db.tables
['testTable']
>>> kx.q.meta(db.testTable)
pykx.KeyedTable(pykx.q('
c | t f a
----| -----
date| d
test| j
p | f
sym | s
'))
>>> db.set_column_attribute('testTable', 'sym', 'grouped')
>>> kx.q.meta(db.testTable)
pykx.KeyedTable(pykx.q('
c | t f a
----| -----
date| d
test| j
p | f
sym | s g
'))
set_column_type
set_column_type(table, column_name, new_type)
Convert/set the type of a column to a specified type
Parameters:
Name | Type | Description | Default |
---|---|---|---|
table |
The name of the table within which a column is to be converted |
required | |
column_name |
Name of the column which is to be converted |
required | |
new_type |
PyKX type to which a column is to be converted |
required |
Returns:
Type | Description |
---|---|
A |
Examples:
Convert the type of a column within a database table
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testDB')
>>> db.tables
['testTable']
>>> kx.q.meta(db.testTable)
pykx.KeyedTable(pykx.q('
c | t f a
----| -----
date| d
test| j
p | f
sym | s
'))
>>> db.set_column_type('testTable', 'test', kx.FloatAtom)
>>> kx.q.meta(db.testTable)
pykx.KeyedTable(pykx.q('
c | t f a
----| -----
date| d
test| f
p | f
sym | s
'))
clear_column_attribute
clear_column_attribute(table, column_name)
Clear an attribute associated with a column of an on-disk database
Parameters:
Name | Type | Description | Default |
---|---|---|---|
table |
The name of the table within which the attribute of a column will be removed |
required | |
column_name |
Name of the column from which an attribute will be removed |
required |
Returns:
Type | Description |
---|---|
A |
Examples:
Remove an attribute of a column of a persisted database
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testDB')
>>> db.tables
['testTable']
>>> kx.q.meta(db.testTable)
pykx.KeyedTable(pykx.q('
c | t f a
----| -----
date| d
test| j
p | f
sym | s g
'))
>>> db.clear_column_attribute('testTable', 'sym')
>>> kx.q.meta(db.testTable)
pykx.KeyedTable(pykx.q('
c | t f a
----| -----
date| d
test| j
p | f
sym | s
'))
copy_column
copy_column(table, original_column, new_column)
Create a copy of a column within a table
Parameters:
Name | Type | Description | Default |
---|---|---|---|
table |
Name of the table |
required | |
original_column |
Name of the column to be copied |
required | |
new_column |
Name of the copied column |
required |
Returns:
Type | Description |
---|---|
A |
|
database following column copy |
Examples:
Copy a column within a kdb+ database
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testDB')
>>> db.list_columns('testTable')
['month', 'sym', 'time', 'price', 'size']
>>> db.copy_column('testTable', 'size', 'dup_size')
['month', 'sym', 'time', 'price', 'size', 'dup_size']
apply_function
apply_function(table, column_name, function)
Apply a function per partition on a column of a persisted kdb+ database
Parameters:
Name | Type | Description | Default |
---|---|---|---|
table |
Name of the table |
required | |
column_name |
Name of the column on which the function is to be applied |
required | |
function |
Callable function to be applied on a column vector per column |
required |
Returns:
Type | Description |
---|---|
A |
|
and the reloading of the database |
Examples:
Apply a q function to a specified column per partition
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testDB')
>>> db.testTable
pykx.PartitionedTable(pykx.q('
month symbol time price
-------------------------------------
2020.01 FDP 00:00:00.004 90.94738
2020.01 FDP 00:00:00.005 33.81127
2020.01 FDP 00:00:00.027 88.89853
2020.01 FDP 00:00:00.035 78.33244
2020.01 JPM 00:00:00.055 68.65177
..
'))
>>> db.apply_function('testTable', 'price', kx.q('2*'))
>>> db.testTable
pykx.PartitionedTable(pykx.q('
month symbol time price
-------------------------------------
2020.01 FDP 00:00:00.004 181.8948
2020.01 FDP 00:00:00.005 67.62254
2020.01 FDP 00:00:00.027 177.7971
2020.01 FDP 00:00:00.035 156.6649
2020.01 JPM 00:00:00.055 137.3035
..
'))
Apply a Python function to the content of a specified column per partition
>>> import pykx as kx
>>> db = kx.DB()
>>> db.load('testDB')
>>> db.testTable
pykx.PartitionedTable(pykx.q('
month symbol time price
-------------------------------------
2020.01 FDP 00:00:00.004 90.94738
2020.01 FDP 00:00:00.005 33.81127
2020.01 FDP 00:00:00.027 88.89853
2020.01 FDP 00:00:00.035 78.33244
2020.01 JPM 00:00:00.055 68.65177
..
'))
>>> db.apply_function('testTable', 'price', lambda x:2*x.np())
>>> db.testTable
pykx.PartitionedTable(pykx.q('
month symbol time price
-------------------------------------
2020.01 FDP 00:00:00.004 181.8948
2020.01 FDP 00:00:00.005 67.62254
2020.01 FDP 00:00:00.027 177.7971
2020.01 FDP 00:00:00.035 156.6649
2020.01 JPM 00:00:00.055 137.3035
..
'))
fill_database
fill_database()
Fill missing tables from partitions within a database using the most recent partition as a template, this will report the partitions but not the tables which are being filled.
Returns:
Type | Description |
---|---|
A |
Examples:
Fill missing tables from a database
>>> import pykx as kx
>>> db = kx.DB(path = 'newDB')
>>> db.fill_database()
Successfully filled missing tables to partition: :/usr/newDB/2020.04
Successfully filled missing tables to partition: :/usr/newDB/2020.03
Successfully filled missing tables to partition: :/usr/newDB/2020.02
Successfully filled missing tables to partition: :/usr/newDB/2020.01
partition_count
partition_count(*, subview=None)
Count the number of rows per partition for the presently loaded database.
Use of the parameter subview
can allow users to count only the rows
in specifies partitions.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
subview |
An optional list of partitions from which to retrieve the per partition count |
None
|
Returns:
Type | Description |
---|---|
A |
Warning
Using this function will result in any specified subview
of the data being reset,
if you require the use of a subview for queries please reset using the database
subview
command.
Examples:
Copy a column within a kdb+ database
>>> import pykx as kx
>>> db = kx.DB(path = 'newDB')
>>> db.partition_count()
pykx.Dictionary(pykx.q('
| trades quotes
-------| -------------
2020.01| 334 0
2020.02| 324 0
2020.03| 342 1000
'))
Copy a column within a kdb+ database
>>> import pykx as kx
>>> db = kx.DB(path = 'newDB')
>>> db.partition_count(sub_view = kx.q('2020.01 2020.02m'))
pykx.Dictionary(pykx.q('
| trades quotes
-------| -------------
2020.01| 334 0
2020.02| 324 0
'))
subview
subview(view=None)
Specify the subview to be used when querying a partitioned table
Parameters:
Name | Type | Description | Default |
---|---|---|---|
view |
A list of partition values which will serve as a filter
for all queries against any partitioned table within the
database. If view is supplied as |
None
|
Returns:
Type | Description |
---|---|
A |
Examples:
Set the subview range to include only 2020.02
and 2020.03
>>> import pykx as kx
>>> db = kx.DB(path = 'newDB')
>>> db.subview(kx.q('2020.02 2020.03m')
>>> kx.q.qsql.select(db.trades, 'month')
pykx.Table(pykx.q('
month
-------
2020.02
2020.03
'))
Reset the database subview to include a fully specified range
>>> import pykx as kx
>>> db = kx.DB(path = 'newDB')
>>> db.subview()
>>> kx.q.qsql.select(db.trades, 'month')
pykx.Table(pykx.q('
month
-------
2020.01
2020.02
2020.03
2020.04
2020.05
'))
enumerate
enumerate(table, *, sym_file=None)
Perform an enumeration on a user specified table against the current sym files associated with the database
Parameters:
Name | Type | Description | Default |
---|---|---|---|
path |
The folder location to which your table will be persisted |
required | |
table |
The |
required | |
sym_file |
The name of the sym file contained in the folder specified by
the |
None
|
Returns:
Type | Description |
---|---|
The supplied table with enumeration applied |
Examples:
Enumerate the symbol columns of a table without specifying the sym
file
>>> import pykx as kx
>>> db = kx.DB(path = 'newDB')
>>> N = 1000
>>> tab = kx.Table(data = {
... 'x': kx.random.random(N, ['a', 'b', 'c']),
... 'x1': kx.random.random(N, 1.0),
... 'x2': kx.random.random(N, 10)
... }
>>> tab = db.enumerate(tab)
>>> tab['x']
pykx.EnumVector(pykx.q('`sym$`a`b`a`c`b..'))
Enumerate the symbol columns of a table specifying the sym
file used
>>> import pykx as kx
>>> db = kx.DB(path = 'newDB')
>>> N = 1000
>>> tab = kx.Table(data = {
... 'x': kx.random.random(N, ['a', 'b', 'c']),
... 'x1': kx.random.random(N, 1.0),
... 'x2': kx.random.random(N, 10)
... }
>>> tab = db.enumerate(tab, sym_file = 'mysym')
>>> tab['x']
pykx.EnumVector(pykx.q('`mysym$`a`b`a`c`b..'))