import os
# While this feature is in beta it must be explicitely enabled with this environment variable
# or by adding the `--pandas-api` flag to the QARGS environment variable
os.environ['PYKX_ENABLE_PANDAS_API'] = 'true'
import pykx as kx
import numpy as np
import pandas as pd
Constructing Tables¶
Table¶
Create a table from a list of rows or by converting a Python dictionary object
Parameters:
Name | Type | Description | Default |
---|---|---|---|
x | Union[list, array] | An array like object containing the contents of each row of the table. | None |
data | dict | A dictionary to be converted into a Table object. | None |
columns | list[str] | A list of column names to use when constructing from an array of rows. | None |
Returns:
Type | Description |
---|---|
Table | The newly constructed table object. |
Examples:
Create a table from a dictionary object.
kx.Table(data={'x': list(range(10)), 'y': [10 - x for x in range(10)]})
pykx.Table(pykx.q(' x y ---- 0 10 1 9 2 8 3 7 4 6 5 5 6 4 7 3 8 2 9 1 '))
Create a Table from an array like object.
kx.Table([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
pykx.Table(pykx.q(' x x1 ---- 0 1 2 3 4 5 6 7 8 9 '))
Create a Table from an array like object and provide names for the columns to use.
kx.Table([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]], columns=['x', 'y', 'z'])
pykx.Table(pykx.q(' x y z ------- 0 1 2 3 4 5 6 7 8 9 10 11 '))
Keyed Table¶
Create a keyed table from a list of rows or by converting a Python dictionary object
Parameters:
Name | Type | Description | Default |
---|---|---|---|
x | Union[list, array] | An array like object containing the contents of each row of the table. | None |
data | dict | A dictionary to be converted into a Table object. | None |
columns | list[str] | A list of column names to use when constructing from an array of rows. | None |
index | list[Any] | An array like object to use as the index column of the table. | None |
Returns:
Type | Description |
---|---|
KeyedTable | The newly constructed keyed table object. |
Examples:
Create a keyed table from a dictionary object.
kx.KeyedTable(data={'x': list(range(10)), 'y': list(10 - x for x in range(10))})
pykx.KeyedTable(pykx.q(' idx| x y ---| ---- 0 | 0 10 1 | 1 9 2 | 2 8 3 | 3 7 4 | 4 6 5 | 5 5 6 | 6 4 7 | 7 3 8 | 8 2 9 | 9 1 '))
Create a keyed table from a list of rows.
kx.KeyedTable([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
pykx.KeyedTable(pykx.q(' idx| x x1 ---| ---- 0 | 0 1 1 | 2 3 2 | 4 5 3 | 6 7 4 | 8 9 '))
Create a keyed table from a list of rows and provide names for the resulting columns.
kx.KeyedTable([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]], columns=['x', 'y', 'z'])
pykx.KeyedTable(pykx.q(' idx| x y z ---| ------- 0 | 0 1 2 1 | 3 4 5 2 | 6 7 8 3 | 9 10 11 '))
Create a keyed table with a specified index column.
kx.KeyedTable(data={'x': list(range(10)), 'y': list(10 - x for x in range(10))}, index=[2 * x for x in range(10)])
pykx.KeyedTable(pykx.q(' idx| x y ---| ---- 0 | 0 10 2 | 1 9 4 | 2 8 6 | 3 7 8 | 4 6 10 | 5 5 12 | 6 4 14 | 7 3 16 | 8 2 18 | 9 1 '))
Metadata¶
kx.q('N: 1000')
tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')
tab
pykx.Table(pykx.q(' x y z w v ------------------------ 0 AAPL 280.0322 868 200 1 MSFT 48.24615 18 2 AAPL 66.99184 301 50 3 MSFT 435.7574 951 0 4 AAPL 385.586 365 50 5 AAPL 325.6626 386 0 6 GOOG 68.13659 844 50 7 AAPL 399.9134 972 100 8 AAPL 310.2315 457 50 9 GOOG 486.8679 475 250 10 AAPL 483.0258 28 200 11 MSFT 210.9703 387 250 12 AAPL 49.11884 205 0 13 GOOG 463.9106 628 14 MSFT 144.0166 848 250 15 AAPL 258.0005 832 50 16 MSFT 420.7807 117 200 17 GOOG 327.0546 914 250 18 MSFT 421.2875 470 0 19 MSFT 481.383 446 50 .. '))
Table.columns¶
Get the name of each column in the table
tab.columns
['x', 'y', 'z', 'w', 'v']
Table.dtypes¶
Get the pandas dtype of each column
tab.dtypes
x int64 y object z float64 w int64 v int64 dtype: object
Table.empty¶
Returns True if the table is empty otherwise returns False.
tab.empty
False
Table.ndim¶
Get the nuber of columns within the table.
tab.ndim
2
Table.shape¶
Get the shape of the table as a tuple (number of rows, number of columns).
tab.shape
(1000, 5)
Table.size¶
Get the number of values in the table (rows * cols).
tab.size
5000
Table.mean()¶
Table.mean(axis=0, numeric_only=False)
Get the mean of values across the requested axis.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
axis | int | The axis to calculate mean across 0 is columns, 1 is rows. | 0 |
numeric_only | bool | Include only columns / rows with numeric data. | False |
Returns:
Type | Description |
---|---|
Dictionary | The mean across each row / column with the key corresponding to the row number or column name. |
Examples:
Calculate the mean across the columns of a table
tab = kx.Table(data=
{
'a': [1, 2, 2, 4],
'b': [1, 2, 6, 7],
'c': [7, 8, 9, 10],
'd': [7, 11, 14, 14]
}
)
tab
pykx.Table(pykx.q(' a b c d --------- 1 1 7 7 2 2 8 11 2 6 9 14 4 7 10 14 '))
tab.mean()
pykx.Dictionary(pykx.q(' a| 2.25 b| 4 c| 8.5 d| 11.5 '))
Calculate the mean across the rows of a table
tab.mean(axis=1)
pykx.Dictionary(pykx.q(' 0| 4 1| 5.75 2| 7.75 3| 8.75 '))
Table.median()¶
Table.median(axis=0, numeric_only=False)
Get the median of values across the requested axis.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
axis | int | The axis to calculate median across 0 is columns, 1 is rows. | 0 |
numeric_only | bool | Include only columns / rows with numeric data. | False |
Returns:
Type | Description |
---|---|
Dictionary | The median across each row / column with the key corresponding to the row number or column name. |
Examples:
Calculate the median across the columns of a table
tab = kx.Table(data=
{
'a': [1, 2, 2, 4],
'b': [1, 2, 6, 7],
'c': [7, 8, 9, 10],
'd': [7, 11, 14, 14]
}
)
tab
pykx.Table(pykx.q(' a b c d --------- 1 1 7 7 2 2 8 11 2 6 9 14 4 7 10 14 '))
tab.median()
pykx.Dictionary(pykx.q(' a| 2 b| 4 c| 8.5 d| 12.5 '))
Calculate the median across the rows of a table
tab.median(axis=1)
pykx.Dictionary(pykx.q(' 0| 4 1| 5 2| 7.5 3| 8.5 '))
Table.mode()¶
Table.mode(axis=0, numeric_only=False, dropna=True)
Get the mode of values across the requested axis.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
axis | int | The axis to calculate mode across 0 is columns, 1 is rows. | 0 |
numeric_only | bool | Include only columns / rows with numeric data. | False |
dropna | bool | Remove null values from the data before calculating the mode. | True |
Returns:
Type | Description |
---|---|
Table | The mode across each row / column with the column corresponding to the row number or column name. |
Examples:
Calculate the mode across the columns of a table
tab = kx.Table(data=
{
'a': [1, 2, 2, 4],
'b': [1, 2, 6, 7],
'c': [7, 8, 9, 10],
'd': [7, 11, 14, 14]
}
)
tab
pykx.Table(pykx.q(' a b c d --------- 1 1 7 7 2 2 8 11 2 6 9 14 4 7 10 14 '))
tab.mode()
pykx.Table(pykx.q(' a b c d --------- 2 1 7 14 2 8 6 9 7 10 '))
Calculate the median across the rows of a table
tab.mode(axis=1)
pykx.Table(pykx.q(' idx 0 1 2 3 ------------- 0 1 7 1 2 2 2 6 9 14 3 4 7 10 14 '))
Calculate the mode across columns and keep null values.
tab = kx.Table(data=
{
'x': [0, 1, 2, 3, 4, 5, 6, 7, np.NaN, np.NaN],
'y': [10, 11, 12, 13, 14, 15, 16, 17, 18, np.NaN],
'z': ['a', 'b', 'c', 'd', 'd', 'e', 'e', 'f', 'g', 'h']
}
)
tab
pykx.Table(pykx.q(' x y z ------- 0 10 a 1 11 b 2 12 c 3 13 d 4 14 d 5 15 e 6 16 e 7 17 f 0n 18 g 0n 0n h '))
tab.mode(dropna=False)
pykx.Table(pykx.q(' x y z ------ 10 d 11 e 12 13 14 15 16 17 18 0n '))
Indexing¶
# The examples in this section will use this example table filled with random data
kx.q('N: 1000')
tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')
tab
pykx.Table(pykx.q(' x y z w v ------------------------ 0 AAPL 60.54614 184 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 5 MSFT 12.28187 127 50 6 MSFT 76.00103 898 200 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 10 MSFT 15.16717 964 50 11 GOOG 164.3905 826 12 MSFT 81.26465 593 250 13 MSFT 218.853 332 200 14 MSFT 181.2571 502 50 15 MSFT 311.1167 705 100 16 GOOG 81.45016 110 17 AAPL 70.85145 722 50 18 AAPL 172.3351 410 100 19 AAPL 9.906452 757 200 .. '))
Table.head()¶
Table.head(n=5)
Get the first n rows from a table.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n | int | The number of rows to return. | 5 |
Returns:
Type | Description |
---|---|
Table | The first n rows of the table. |
Examples:
Return the first 5 rows of the table.
tab.head()
pykx.Table(pykx.q(' x y z w v ----------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 '))
Return the first 10 rows of the table.
tab.head(10)
pykx.Table(pykx.q(' x y z w v ----------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 5 MSFT 12.28187 127 50 6 MSFT 76.00103 898 200 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 '))
Table.tail()¶
Table.tail(n=5)
Get the last n rows from a table.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n | int | The number of rows to return. | 5 |
Returns:
Type | Description |
---|---|
Table | The last n rows of the table. |
Examples:
Return the last 5 rows of the table.
tab.tail()
pykx.Table(pykx.q(' x y z w v ------------------------ 995 AAPL 54.79148 295 996 AAPL 448.0876 779 997 GOOG 173.26 896 0 998 GOOG 350.8906 200 50 999 MSFT 409.2486 807 '))
Return the last 10 rows of the table.
tab.tail(10)
pykx.Table(pykx.q(' x y z w v ------------------------- 990 GOOG 407.4229 546 100 991 AAPL 18.6764 228 50 992 AAPL 408.8369 665 200 993 AAPL 282.1039 166 994 GOOG 69.54563 273 200 995 AAPL 54.79148 295 996 AAPL 448.0876 779 997 GOOG 173.26 896 0 998 GOOG 350.8906 200 50 999 MSFT 409.2486 807 '))
Table.get()¶
Table.get(key, default=None)
Get a column or columns from a table by key, if the key does not exist return the default value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
key | Union[str, list[str]] | The column name or list of names to get from the table. | required |
default | int | The default value if the key is not found. | None |
Returns:
Type | Description |
---|---|
Union[Table, Any] | A table contatining only the columns requested or the default value. |
Examples:
Get the y
column from the table.
tab.get('y')
pykx.Table(pykx.q(' y ---- AAPL GOOG GOOG MSFT MSFT MSFT MSFT AAPL AAPL AAPL MSFT GOOG MSFT MSFT MSFT MSFT GOOG AAPL AAPL AAPL .. '))
Get the y
and z
columns from the table.
tab.get(['y', 'z'])
pykx.Table(pykx.q(' y z ------------- AAPL 60.54614 GOOG 292.5522 GOOG 283.0627 MSFT 42.38194 MSFT 453.4634 MSFT 12.28187 MSFT 76.00103 AAPL 17.802 AAPL 68.05945 AAPL 405.7366 MSFT 15.16717 GOOG 164.3905 MSFT 81.26465 MSFT 218.853 MSFT 181.2571 MSFT 311.1167 GOOG 81.45016 AAPL 70.85145 AAPL 172.3351 AAPL 9.906452 .. '))
Attempt to get the q
column from the table and recieve none as that column does not exist.
print(tab.get('q'))
None
Attempt to get the q
column from the table and recieve the default value not found
as that column does not exist.
tab.get('q', 'not found')
'not found'
Table.at[]¶
Table.at[row, col]
Access a single value for a row / column pair.
Similar to loc[]
, in that both provide label-based lookups. Use at
if you only need to get or set a single value.
The at
property can be used for both assignment and retrieval of values at a given row and column.
Examples:
Get the value of the z
column in the 997th row.
tab.at[997, 'z']
pykx.FloatAtom(pykx.q('173.26'))
Reassign the value of the z
column in the 997th row to 3.14159
.
tab.at[997, 'z'] = 3.14159
tab.at[997, 'z']
pykx.FloatAtom(pykx.q('3.14159'))
Table.loc[]¶
Table.loc[:, :]
Access a group of rows and columns by label or by BooleanVector.
loc
is a label based form of indexing, but may also be used with a boolean array.
Allowed inputs are:
- A single label, e.g. 5 or 'a', (note that 5 is interpreted as a label of the index, and never as an integer position along the index)
- A list or array of labels, e.g. ['a', 'b', 'c']
- A slice object with labels, e.g. 'a':'f'
- Warning contrary to usual python slices, both the start and the stop are included
- A BooleanVector of the same length as the axis being sliced
- An alignable BooleanVector. The index of the key will be aligned before masking
- An alignable Index. The Index of the returned selection will be the input
- A callable function with one argument (the calling Table like object) and that returns valid output for indexing (e.g. one of the above)
Note:
When the Pandas API is enabled, using []
to index into a table will use Table.loc[]
Returns:
Type | Description |
---|---|
Table | A table contatining only the columns / rows requested. |
Examples:
Get every row in the y
column.
tab[:, 'y']
pykx.Table(pykx.q(' y ---- AAPL GOOG GOOG MSFT MSFT MSFT MSFT AAPL AAPL AAPL MSFT GOOG MSFT MSFT MSFT MSFT GOOG AAPL AAPL AAPL .. '))
Get all rows of the table where the value in the z
column is greater than 250.0
tab[tab['z'] > 250.0]
pykx.Table(pykx.q(' x y z w v ------------------------ 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 4 MSFT 453.4634 69 100 9 AAPL 405.7366 56 0 15 MSFT 311.1167 705 100 22 MSFT 325.2313 425 100 23 GOOG 389.6867 143 200 26 MSFT 374.9333 225 27 AAPL 351.9635 242 200 28 AAPL 292.2184 153 50 30 MSFT 445.9395 900 100 31 AAPL 406.9874 495 250 32 GOOG 484.709 127 37 MSFT 354.8972 965 39 MSFT 323.4569 825 100 40 GOOG 446.6471 161 0 44 GOOG 465.8163 447 0 47 AAPL 251.128 608 250 49 MSFT 315.3486 997 51 GOOG 379.302 16 0 .. '))
Replace all null values in the column v
with the value -100
.
tab.loc[tab['v'] == kx.q('0N'), 'v'] = -100
tab
pykx.Table(pykx.q(' x y z w v ------------------------- 0 AAPL 60.54614 184 -100 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 5 MSFT 12.28187 127 50 6 MSFT 76.00103 898 200 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 10 MSFT 15.16717 964 50 11 GOOG 164.3905 826 -100 12 MSFT 81.26465 593 250 13 MSFT 218.853 332 200 14 MSFT 181.2571 502 50 15 MSFT 311.1167 705 100 16 GOOG 81.45016 110 -100 17 AAPL 70.85145 722 50 18 AAPL 172.3351 410 100 19 AAPL 9.906452 757 200 .. '))
Replace all locations in column v
where the value is -100
with a null.
tab[tab['v'] == -100, 'v'] = kx.q('0N')
tab
pykx.Table(pykx.q(' x y z w v ------------------------ 0 AAPL 60.54614 184 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 5 MSFT 12.28187 127 50 6 MSFT 76.00103 898 200 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 10 MSFT 15.16717 964 50 11 GOOG 164.3905 826 12 MSFT 81.26465 593 250 13 MSFT 218.853 332 200 14 MSFT 181.2571 502 50 15 MSFT 311.1167 705 100 16 GOOG 81.45016 110 17 AAPL 70.85145 722 50 18 AAPL 172.3351 410 100 19 AAPL 9.906452 757 200 .. '))
Usage of the loc
functionality under the hood additionally allows users to set columns within a table for single or multiple columns. Data passed for this can be q/Python.
tab['new_col'] = kx.q('1000?1f')
tab[['new_col1', 'new_col2']] = [20, kx.q('1000?0Ng')]
Table.iloc[]¶
Table.iloc[:, :]
Purely integer-location based indexing for selection by position.
iloc
is primarily integer position based (from 0 to length-1 of the axis), but may also be used with a BooleanVector.
Allowed inputs are:
- An integer, e.g. 5.
- A list or array of integers, e.g. [4, 3, 0].
- A slice object with ints, e.g. 1:7.
- A BooleanVector.
- A callable function with one argument (the calling Series or DataFrame) and that returns valid output for indexing (one of the above). This is useful in method chains, when you don’t have a reference to the calling object, but would like to base your selection on some value.
- A tuple of row and column indexes. The tuple elements consist of one of the above inputs, e.g. (0, 1).
Returns:
Type | Description |
---|---|
Table | A table contatining only the columns / rows requested. |
Examples:
Get the second row from a table.
tab.iloc[1]
pykx.Table(pykx.q(' x y z w v new_col new_col1 new_col2 ------------------------------------------------------------------------------- 1 GOOG 292.5522 299 100 0.2627833 20 f8695691-f901-256f-ebd0-29b8fa8b83d7 '))
Get the first 5 rows from a table.
tab.iloc[:5]
pykx.Table(pykx.q(' x y z w v new_col new_col1 new_col2 ------------------------------------------------------------------------------- 0 AAPL 60.54614 184 0.8227748 20 298f534d-d51c-5ef0-e267-2ca28b4aa3cb 1 GOOG 292.5522 299 100 0.2627833 20 f8695691-f901-256f-ebd0-29b8fa8b83d7 2 GOOG 283.0627 411 250 0.8676527 20 81539d7c-c284-f7e5-8f9f-9a88261be328 3 MSFT 42.38194 713 0 0.1259831 20 b30b2ac2-65a1-4a5a-c610-e115348e38bc 4 MSFT 453.4634 69 100 0.7169927 20 dbac4fbf-7e0d-9351-dada-3691d4032a1d '))
Get all rows of the table where the y
column is equal to AAPL
.
tab.iloc[tab['y'] == 'AAPL']
pykx.Table(pykx.q(' x y z w v new_col new_col1 new_col2 .. -----------------------------------------------------------------------------.. 0 AAPL 60.54614 184 0.8227748 20 298f534d-d51c-5ef0-e267-2ca28b4a.. 7 AAPL 17.802 822 0 0.9142186 20 0328294d-c97d-b42a-cd4f-5bef9c20.. 8 AAPL 68.05945 197 0 0.974406 20 abee1656-6460-b1f0-cdb8-287989e8.. 9 AAPL 405.7366 56 0 0.9522664 20 91533618-2e03-9cb6-54a9-a4b31fe3.. 17 AAPL 70.85145 722 50 0.3274562 20 0b0e7d70-d397-3df0-cb27-7918231b.. 18 AAPL 172.3351 410 100 0.8824775 20 de176486-d430-ebb9-ce40-32bdc9a1.. 19 AAPL 9.906452 757 200 0.4697741 20 9c96eca4-2fe5-3178-0da2-21390271.. 20 AAPL 199.5309 790 0.5114003 20 220980b5-aa45-eb96-a243-c57cc281.. 25 AAPL 27.35117 145 0 0.1378973 20 1f24d55e-b1cb-d1e8-3b17-3743629c.. 27 AAPL 351.9635 242 200 0.8224545 20 d9d2e29d-5d42-61f5-f964-d88d0ab6.. 28 AAPL 292.2184 153 50 0.6473412 20 dbba3ff9-6f78-6fca-0230-2004b4ad.. 31 AAPL 406.9874 495 250 0.8142842 20 332d646e-2238-b2ff-0ae3-473e8abe.. 35 AAPL 199.1284 252 200 0.1951745 20 868d878a-0d18-1131-ef38-1efe4b2b.. 38 AAPL 192.8447 604 0.2555771 20 1b25c156-1838-3a81-b121-bad1b8fc.. 45 AAPL 241.7971 919 50 0.70451 20 24ba816a-1536-1fa8-b0d7-cba0da95.. 46 AAPL 25.79979 131 50 0.03350763 20 5dabf5c6-3824-e0b3-91d0-1b3742dd.. 47 AAPL 251.128 608 250 0.6016906 20 e86919cf-45c5-121c-a6a0-35180ece.. 54 AAPL 374.937 986 0.4463675 20 c2eba394-27f5-5412-f086-4d18550e.. 57 AAPL 401.5671 996 0.4703428 20 412390fa-15ed-83f6-3ede-12597da8.. 58 AAPL 181.9741 768 0.3323934 20 1a1298db-67f2-5cae-28ef-9bbf13ef.. .. '))
Get all rows of the table where the y
column is equal to AAPL
, and only return the y
, z
and w
columns.
tab.iloc[tab['y'] == 'AAPL', ['y', 'z', 'w']]
pykx.Table(pykx.q(' y z w ----------------- AAPL 60.54614 184 AAPL 17.802 822 AAPL 68.05945 197 AAPL 405.7366 56 AAPL 70.85145 722 AAPL 172.3351 410 AAPL 9.906452 757 AAPL 199.5309 790 AAPL 27.35117 145 AAPL 351.9635 242 AAPL 292.2184 153 AAPL 406.9874 495 AAPL 199.1284 252 AAPL 192.8447 604 AAPL 241.7971 919 AAPL 25.79979 131 AAPL 251.128 608 AAPL 374.937 986 AAPL 401.5671 996 AAPL 181.9741 768 .. '))
Replace all null values in the column v
with the value -100
.
tab.iloc[tab['v'] == kx.q('0N'), 'v'] = -100
tab
pykx.Table(pykx.q(' x y z w v new_col new_col1 new_col2 .. -----------------------------------------------------------------------------.. 0 AAPL 60.54614 184 -100 0.8227748 20 298f534d-d51c-5ef0-e267-2ca28b4a.. 1 GOOG 292.5522 299 100 0.2627833 20 f8695691-f901-256f-ebd0-29b8fa8b.. 2 GOOG 283.0627 411 250 0.8676527 20 81539d7c-c284-f7e5-8f9f-9a88261b.. 3 MSFT 42.38194 713 0 0.1259831 20 b30b2ac2-65a1-4a5a-c610-e115348e.. 4 MSFT 453.4634 69 100 0.7169927 20 dbac4fbf-7e0d-9351-dada-3691d403.. 5 MSFT 12.28187 127 50 0.1039635 20 5c6624e8-cd25-3787-2dfa-82e87186.. 6 MSFT 76.00103 898 200 0.6325627 20 062e80b2-cd2a-38a8-b47a-a9e3019c.. 7 AAPL 17.802 822 0 0.9142186 20 0328294d-c97d-b42a-cd4f-5bef9c20.. 8 AAPL 68.05945 197 0 0.974406 20 abee1656-6460-b1f0-cdb8-287989e8.. 9 AAPL 405.7366 56 0 0.9522664 20 91533618-2e03-9cb6-54a9-a4b31fe3.. 10 MSFT 15.16717 964 50 0.4739534 20 31a20cbf-b153-064c-dcb9-3bfabc53.. 11 GOOG 164.3905 826 -100 0.1147673 20 ccc5c42a-bfae-2859-030c-8829efc2.. 12 MSFT 81.26465 593 250 0.8798201 20 fc473882-4a9c-7f52-e526-8c8f7df0.. 13 MSFT 218.853 332 200 0.631918 20 be5f7603-6d21-2540-5c5a-2121ee6d.. 14 MSFT 181.2571 502 50 0.6595839 20 31872838-81c3-8eb2-7183-eb3b022f.. 15 MSFT 311.1167 705 100 0.2369972 20 ba604955-0923-9121-6183-9584ef9d.. 16 GOOG 81.45016 110 -100 0.4429199 20 c5d8b6f3-f0c6-087c-8906-c2e874b7.. 17 AAPL 70.85145 722 50 0.3274562 20 0b0e7d70-d397-3df0-cb27-7918231b.. 18 AAPL 172.3351 410 100 0.8824775 20 de176486-d430-ebb9-ce40-32bdc9a1.. 19 AAPL 9.906452 757 200 0.4697741 20 9c96eca4-2fe5-3178-0da2-21390271.. .. '))
Table.pop()¶
Table.pop(item)
Remove a column or columns from a table by column name and return the column after it has been removed.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
item | Union[str, list[str]] | The column name or list of names to pop from the table. | required |
Returns:
Type | Description |
---|---|
Table | A table contatining only the columns removed from the input table. |
Examples:
Remove the v
column from the table and return it.
print(f'{tab.head()}\n\n')
print('Pop the `v` column out of the table')
print(f'{tab.pop("v").head()}\n\n')
print('Updated Table')
print(tab.head())
x y z w v new_col new_col1 new_col2 .. -----------------------------------------------------------------------------.. 0 AAPL 60.54614 184 -100 0.8227748 20 298f534d-d51c-5ef0-e267-2ca28b4aa.. 1 GOOG 292.5522 299 100 0.2627833 20 f8695691-f901-256f-ebd0-29b8fa8b8.. 2 GOOG 283.0627 411 250 0.8676527 20 81539d7c-c284-f7e5-8f9f-9a88261be.. 3 MSFT 42.38194 713 0 0.1259831 20 b30b2ac2-65a1-4a5a-c610-e115348e3.. 4 MSFT 453.4634 69 100 0.7169927 20 dbac4fbf-7e0d-9351-dada-3691d4032.. Pop the `v` column out of the table v ---- -100 100 250 0 100 Updated Table x y z w new_col new_col1 new_col2 --------------------------------------------------------------------------- 0 AAPL 60.54614 184 0.8227748 20 298f534d-d51c-5ef0-e267-2ca28b4aa3cb 1 GOOG 292.5522 299 0.2627833 20 f8695691-f901-256f-ebd0-29b8fa8b83d7 2 GOOG 283.0627 411 0.8676527 20 81539d7c-c284-f7e5-8f9f-9a88261be328 3 MSFT 42.38194 713 0.1259831 20 b30b2ac2-65a1-4a5a-c610-e115348e38bc 4 MSFT 453.4634 69 0.7169927 20 dbac4fbf-7e0d-9351-dada-3691d4032a1d
Remove the z
and w
columns from the table and return them.
print(f'{tab.head()}\n\n')
print('Pop the `z` and `w` columns out of the table')
print(f'{tab.pop(["z", "w"]).head()}\n\n')
print('Updated Table')
print(tab.head())
x y z w new_col new_col1 new_col2 --------------------------------------------------------------------------- 0 AAPL 60.54614 184 0.8227748 20 298f534d-d51c-5ef0-e267-2ca28b4aa3cb 1 GOOG 292.5522 299 0.2627833 20 f8695691-f901-256f-ebd0-29b8fa8b83d7 2 GOOG 283.0627 411 0.8676527 20 81539d7c-c284-f7e5-8f9f-9a88261be328 3 MSFT 42.38194 713 0.1259831 20 b30b2ac2-65a1-4a5a-c610-e115348e38bc 4 MSFT 453.4634 69 0.7169927 20 dbac4fbf-7e0d-9351-dada-3691d4032a1d Pop the `z` and `w` columns out of the table z w ------------ 60.54614 184 292.5522 299 283.0627 411 42.38194 713 453.4634 69 Updated Table x y new_col new_col1 new_col2 -------------------------------------------------------------- 0 AAPL 0.8227748 20 298f534d-d51c-5ef0-e267-2ca28b4aa3cb 1 GOOG 0.2627833 20 f8695691-f901-256f-ebd0-29b8fa8b83d7 2 GOOG 0.8676527 20 81539d7c-c284-f7e5-8f9f-9a88261be328 3 MSFT 0.1259831 20 b30b2ac2-65a1-4a5a-c610-e115348e38bc 4 MSFT 0.7169927 20 dbac4fbf-7e0d-9351-dada-3691d4032a1d
Reindexing¶
# The examples in this section will use this example table filled with random data
kx.q('N: 1000')
tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')
tab.head()
pykx.Table(pykx.q(' x y z w v ----------------------- 0 MSFT 430.2492 949 250 1 AAPL 305.8041 558 250 2 AAPL 414.3448 674 250 3 AAPL 84.95412 710 0 4 GOOG 372.5827 348 '))
Table.drop()¶
Table.drop(item, axis=0)
Remove either columns or rows from a table and return the resulting Table object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
item | Union[str, list[str]] | The column name(s) or row number(s) to drop from the table. | required |
axis | int | The column name or list of names to pop from the table. | 0 |
Returns:
Type | Description |
---|---|
Table | A table with the given column(s) / row(s) removed. |
Examples:
Drop rows from a table.
tab.drop([0, 2, 4, 6, 8, 10]).head()
pykx.Table(pykx.q(' x y z w v ----------------------- 1 AAPL 305.8041 558 250 3 AAPL 84.95412 710 0 5 GOOG 12.72708 48 0 7 AAPL 2.78757 833 0 9 MSFT 167.0789 260 50 '))
Drop columns from a table.
tab.drop('y', axis=1).head()
pykx.Table(pykx.q(' x z w v ------------------ 0 430.2492 949 250 1 305.8041 558 250 2 414.3448 674 250 3 84.95412 710 0 4 372.5827 348 '))
Table.drop_duplicates()¶
Table.drop_duplicates()
Remove either columns or rows from a table and return the resulting Table object.
Returns:
Type | Description |
---|---|
Table | A table with all duplicate rows removed. |
Examples:
Create a table with duplicates for the example
tab2 = kx.q('([] 100?`AAPL`GOOG`MSFT; 100?3)')
tab2
pykx.Table(pykx.q(' x x1 ------- AAPL 0 AAPL 1 MSFT 1 AAPL 0 AAPL 0 GOOG 2 GOOG 0 MSFT 2 MSFT 2 MSFT 2 AAPL 1 AAPL 0 AAPL 1 GOOG 0 GOOG 0 MSFT 0 AAPL 2 GOOG 1 AAPL 1 AAPL 1 .. '))
Drop all duplicate rows from the table.
tab2.drop_duplicates()
pykx.Table(pykx.q(' x x1 ------- AAPL 0 AAPL 1 MSFT 1 GOOG 2 GOOG 0 MSFT 2 MSFT 0 AAPL 2 GOOG 1 '))
Table.rename()¶
Table.rename(columns)
Rename columns in a table and return the resulting Table object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
item | dict[str, str] | A dictonary of column name to new column name to use when renaming. | required |
Returns:
Type | Description |
---|---|
Table | A table with the given column(s) renamed. |
Examples:
The inital table we will be renaming columns on.
tab.head()
pykx.Table(pykx.q(' x y z w v ----------------------- 0 MSFT 430.2492 949 250 1 AAPL 305.8041 558 250 2 AAPL 414.3448 674 250 3 AAPL 84.95412 710 0 4 GOOG 372.5827 348 '))
Rename column y
to symbol
and z
to price
.
tab.rename(columns={'y': 'symbol', 'z': 'price'}).head()
pykx.Table(pykx.q(' x symbol price w v ------------------------- 0 MSFT 430.2492 949 250 1 AAPL 305.8041 558 250 2 AAPL 414.3448 674 250 3 AAPL 84.95412 710 0 4 GOOG 372.5827 348 '))
Table.sample()¶
Table.sample(n, frac, replace, weights, random_state, axis, ignore_index)
Sample random data from the table.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n | int | Number of rows to return. Cannot be used with frac . Default is 1 if frac is None. |
None |
frac | float | Fraction of the rows to return. Cannot be used with n . |
None |
replace | bool | Whether or not it should be possible to sample the same row twice. | False |
weights | None | Not yet implemented. | None |
random_state | None | Not yet implemented. | None |
axis | None | Not yet implemented. | None |
ignore_index | bool | Not yet implemented. | False |
Returns:
Type | Description |
---|---|
Table | A table with the given column(s) renamed. |
Examples:
Sample 10 Rows.
tab.sample(n=10)
pykx.Table(pykx.q(' x y z w v ------------------------- 694 MSFT 176.7876 657 50 730 AAPL 139.1981 119 100 907 AAPL 475.5725 270 100 35 MSFT 364.8574 31 100 374 AAPL 290.6377 709 0 839 GOOG 70.42376 567 0 620 GOOG 457.2576 927 100 779 GOOG 384.5403 343 200 805 AAPL 474.8613 835 0 217 AAPL 220.8823 477 250 '))
Sample 10% of the rows.
tab.sample(frac=0.1)
pykx.Table(pykx.q(' x y z w v ------------------------- 51 GOOG 192.5514 768 100 959 MSFT 258.0994 8 250 273 AAPL 211.6923 890 50 996 AAPL 126.8514 647 250 893 AAPL 70.56788 471 250 747 GOOG 139.1643 843 200 750 AAPL 65.00057 842 100 821 MSFT 211.0226 785 782 MSFT 105.5827 832 100 69 MSFT 146.7083 165 572 MSFT 8.196531 845 946 MSFT 99.4288 676 0 721 MSFT 259.7821 762 50 391 MSFT 79.41776 571 250 390 GOOG 140.954 68 0 775 MSFT 481.119 681 0 963 MSFT 252.8108 985 445 MSFT 61.92275 926 50 847 MSFT 217.0418 762 250 843 AAPL 104.0953 304 100 .. '))
Sample 10% of the rows and allow the same row to be sampled twice.
tab.sample(frac=0.1, replace=True)
pykx.Table(pykx.q(' x y z w v ------------------------- 847 MSFT 217.0418 762 250 113 MSFT 153.6541 927 568 AAPL 474.214 925 200 985 MSFT 276.398 536 50 562 GOOG 122.4301 218 0 824 GOOG 472.5462 639 250 459 MSFT 176.35 846 0 818 GOOG 217.7887 183 693 GOOG 207.8291 117 250 123 AAPL 457.6728 514 100 793 MSFT 39.68875 661 200 563 GOOG 297.4374 822 100 656 MSFT 330.0581 298 250 93 GOOG 496.3905 578 250 308 MSFT 151.5628 990 50 717 MSFT 491.4528 980 200 115 AAPL 16.74892 432 50 195 AAPL 201.6598 861 48 GOOG 450.0731 88 200 447 GOOG 90.86316 760 0 .. '))
Table.select_dtypes()¶
Table.select_dtypes(include=None, exclude=None)
Return a subset of the DataFrame’s columns based on the column dtypes.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
include | Union[List, str] | A selection of dtypes or strings to be included. | None |
exclude | Union[List, str] | A selection of dtypes or strings to be excluded. | None |
At least one of these parameters must be supplied.
Returns:
Type | Description |
---|---|
Dataframe | The subset of the frame including the dtypes in include and excluding the dtypes in exclude . |
Examples:
The examples in the section will use the example table.
df = kx.q('([] c1:`a`b`c; c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)')
Exclude columns contatining symbols
df.select_dtypes(exclude = kx.SymbolVector)
pykx.Table(pykx.q(' c2 c3 c4 -------- 1 1 1 2 2 2 3 3 3 '))
Include a list of column types
df.select_dtypes(include = [kx.ShortVector, kx.LongVector])
pykx.Table(pykx.q(' c2 c3 ----- 1 1 2 2 3 3 '))
Table.astype()¶
Table.astype(dtype, copy=True, errors='raise')
Cast a column/columns of the Dataframes object to a specified dtype
.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dtype | data type, or dict of column name -> data type | Use a PyKx wrapper data type or Python type to cast all columns to the same type. Alternatively, use {col: dtype, …}, where col is a column label and dtype is PyKx wrapper data type to cast one or more of the DataFrame’s columns to column-specific types. | |
copy | Boolean | Default of True, False not implemented | True |
errors | {‘raise’, ‘ignore’} | If passed anything other than 'raise', it will return the dataframe | 'raise' |
Returns:
Type | Description |
---|---|
Dataframe | The dataframe with columns casted according to passed dtypes |
Examples:
The examples in the section will use the example table.
df = kx.q('([] c1:1 2 3i; c2:1 2 3j; c3:1 2 3h; c4:1 2 3i)')
Cast all columns to dtype LongVector
df.astype(kx.LongVector)
pykx.Table(pykx.q(' c1 c2 c3 c4 ----------- 1 1 1 1 2 2 2 2 3 3 3 3 '))
Casting as specified in the dcitionary supplied with given dtype per column
df.astype({'c1':kx.LongVector, 'c2':'kx.ShortVector'})
pykx.Table(pykx.q(' c1 c2 c3 c4 ----------- 1 1 1 1 2 2 2 2 3 3 3 3 '))
The next example will use this table
df = kx.q('([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j; c4:("abc";"def";"ghi");c5:"abc";c6:(1 2 3;4 5 6;7 8 9))')
Casting char and string columns to symbol columns
df.astype({'c4':kx.SymbolVector, 'c5':kx.SymbolVector})
pykx.Table(pykx.q(' c1 c2 c3 c4 c5 c6 ------------------------------------------------- 2023.08.18D09:51:22.566432006 abc 1 abc a 1 2 3 2023.08.18D09:51:22.566432006 def 2 def b 4 5 6 2023.08.18D09:51:22.566432006 ghi 3 ghi c 7 8 9 '))
Merging¶
Table.merge()¶
Table.merge(
right,
how='inner',
on=None,
left_on=None,
right_on=None,
left_index=False,
right_index=False,
sort=False,
suffixes=('_x', '_y'),
copy=True,
validate=None,
q_join=False
)
Merge Table or KeyedTable objects with a database-style join.
The join is done on columns or indexes. If joining columns on columns, the DataFrame indexes will be ignored. Otherwise if joining indexes on indexes or indexes on a column or columns, the index will be passed on. When performing a cross merge, no column specifications to merge on are allowed.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
right | Union[Table/KeyedTable] | The object to merge with. | required |
how | str | The type of join to be used. One of {‘left’, ‘right’, ‘outer’, ‘inner’, ‘cross’}. | ‘inner’ |
on | str | The column name to join on. | None |
left_on | str | The column name in the left table to join on. | None |
right_on | str | The column name in the right table to join on. | None |
left_index | bool | Use the index of the left Table. | False |
right_index | bool | Use the index of the right Table. | False |
sort | bool | Sort the join keys of the resulting table. | False |
suffixes | Tuple(str, str) | The number of rows to return. | ('_x', '_y') |
copy | bool | If False avoid copies and modify the input table. | None |
validate | str | If specified checks if merge matches specified type. - “one_to_one” or “1:1”: check if merge keys are unique in both left and right datasets. - “one_to_many” or “1:m”: check if merge keys are unique in left dataset. - “many_to_one” or “m:1”: check if merge keys are unique in right dataset. - “many_to_many” or “m:m”: allowed, but does not result in checks. |
None |
q_join | bool | If True perform native q joins instead of the pandas SQL like joins. More documentation around these joins can be found here. | False |
Returns:
Type | Description |
---|---|
Table / KeyedTable | The resulting table like object after the join has been preformed. |
Examples:
Merge tab1 and tab2 on the lkey and rkey columns. The value columns have the default suffixes, _x and _y, appended.
tab1 = kx.Table(data={'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
tab2 = kx.Table(data={'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
tab1.merge(tab2, left_on='lkey', right_on='rkey')
pykx.Table(pykx.q(' lkey value_x rkey value_y ------------------------- foo 1 foo 5 foo 1 foo 8 foo 5 foo 5 foo 5 foo 8 bar 2 bar 6 baz 3 baz 7 '))
Merge tab1 and tab2 on the lkey and rkey columns using a native q inner join. The value columns have the default suffixes, _x and _y, appended.
tab1.merge(tab2, left_on='lkey', right_on='rkey', q_join=True)
pykx.Table(pykx.q(' lkey value_x rkey value_y ------------------------- foo 1 foo 5 bar 2 bar 6 baz 3 baz 7 foo 5 foo 5 '))
Merge tab1 and tab2 with specified left and right suffixes appended to any overlapping columns.
tab1.merge(tab2, left_on='lkey', right_on='rkey', suffixes=('_left', '_right'))
pykx.Table(pykx.q(' lkey value_left rkey value_right -------------------------------- foo 1 foo 5 foo 1 foo 8 foo 5 foo 5 foo 5 foo 8 bar 2 bar 6 baz 3 baz 7 '))
Merge tab1 and tab2 but raise an exception if the Tables have any overlapping columns.
try:
tab1.merge(tab2, left_on='lkey', right_on='rkey', suffixes=(False, False))
except BaseException as e:
print(f'Caught Error: {e}')
Caught Error: Columns overlap but no suffix specified: ['value']
tab1 = kx.Table(data={'a': ['foo', 'bar'], 'b': [1, 2]})
tab2 = kx.Table(data={'a': ['foo', 'baz'], 'c': [3, 4]})
Merge tab1 and tab2 on the a
column using an inner join.
tab1.merge(tab2, how='inner', on='a')
pykx.Table(pykx.q(' a b c ------- foo 1 3 '))
Merge tab1 and tab2 on the a
column using a left join.
tab1.merge(tab2, how='left', on='a')
pykx.Table(pykx.q(' a b c ------- foo 1 3 bar 2 '))
Merge tab1 and tab2 using a cross join.
tab1 = kx.Table(data={'left': ['foo', 'bar']})
tab2 = kx.Table(data={'right': [7, 8]})
tab1.merge(tab2, how='cross')
pykx.Table(pykx.q(' left right ---------- foo 7 foo 8 bar 7 bar 8 '))
Table.merge_asof()¶
Table.merge_asof(
right,
on=None,
left_on=None,
right_on=None,
left_index=False,
right_index=False,
by=None,
left_by=None,
right_by=None,
suffixes=('_x', '_y'),
tolerance=None,
allow_exact_matches=True,
direction='backward'
)
Merge Table or KeyedTable objects with a database-style join.
The join is done on columns or indexes. If joining columns on columns, the DataFrame indexes will be ignored. Otherwise if joining indexes on indexes or indexes on a column or columns, the index will be passed on. When performing a cross merge, no column specifications to merge on are allowed.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
right | Union[Table/KeyedTable] | The object to merge with. | required |
how | str | The type of join to be used. One of {‘left’, ‘right’, ‘outer’, ‘inner’, ‘cross’}. | ‘inner’ |
on | str | The column name to join on. | None |
left_on | str | The column name in the left table to join on. | None |
right_on | str | The column name in the right table to join on. | None |
left_index | bool | Use the index of the left Table. | False |
right_index | bool | Use the index of the right Table. | False |
by | str | Not yet implemented. | None |
left_by | str | Field names to match on in the left table. | None |
right_by | str | Field names to match on in the right table. | None |
suffixes | Tuple(str, str) | The number of rows to return. | ('_x', '_y') |
tolerance | Any | Not yet implemented. | None |
allow_exact_matches | bool | Not yet implemented. | True |
direction | str | Not yet implemented. | 'backward' |
Returns:
Type | Description |
---|---|
Table / KeyedTable | The resulting table like object after the join has been preformed. |
Examples:
Perform a simple asof join on two tables.
left = kx.Table(data={"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
right = kx.Table(data={"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]})
left
pykx.Table(pykx.q(' a left_val ----------- 1 a 5 b 10 c '))
right
pykx.Table(pykx.q(' a right_val ----------- 1 1 2 2 3 3 6 6 7 7 '))
left.merge_asof(right)
pykx.Table(pykx.q(' a left_val right_val --------------------- 1 a 1 5 b 3 10 c 7 '))
Perform a asof join on two tables but first merge them on the by column.
trades = kx.Table(data={
"time": [
pd.Timestamp("2016-05-25 13:30:00.023"),
pd.Timestamp("2016-05-25 13:30:00.023"),
pd.Timestamp("2016-05-25 13:30:00.030"),
pd.Timestamp("2016-05-25 13:30:00.041"),
pd.Timestamp("2016-05-25 13:30:00.048"),
pd.Timestamp("2016-05-25 13:30:00.049"),
pd.Timestamp("2016-05-25 13:30:00.072"),
pd.Timestamp("2016-05-25 13:30:00.075")
],
"ticker": [
"GOOG",
"MSFT",
"MSFT",
"MSFT",
"GOOG",
"AAPL",
"GOOG",
"MSFT"
],
"bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
"ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03]
})
quotes = kx.Table(data={
"time": [
pd.Timestamp("2016-05-25 13:30:00.023"),
pd.Timestamp("2016-05-25 13:30:00.038"),
pd.Timestamp("2016-05-25 13:30:00.048"),
pd.Timestamp("2016-05-25 13:30:00.048"),
pd.Timestamp("2016-05-25 13:30:00.048")
],
"ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
"price": [51.95, 51.95, 720.77, 720.92, 98.0],
"quantity": [75, 155, 100, 100, 100]
})
trades
pykx.Table(pykx.q(' time ticker bid ask ------------------------------------------------- 2016.05.25D13:30:00.023000000 GOOG 720.5 720.93 2016.05.25D13:30:00.023000000 MSFT 51.95 51.96 2016.05.25D13:30:00.030000000 MSFT 51.97 51.98 2016.05.25D13:30:00.041000000 MSFT 51.99 52 2016.05.25D13:30:00.048000000 GOOG 720.5 720.93 2016.05.25D13:30:00.049000000 AAPL 97.99 98.01 2016.05.25D13:30:00.072000000 GOOG 720.5 720.88 2016.05.25D13:30:00.075000000 MSFT 52.01 52.03 '))
quotes
pykx.Table(pykx.q(' time ticker price quantity ---------------------------------------------------- 2016.05.25D13:30:00.023000000 MSFT 51.95 75 2016.05.25D13:30:00.038000000 MSFT 51.95 155 2016.05.25D13:30:00.048000000 GOOG 720.77 100 2016.05.25D13:30:00.048000000 GOOG 720.92 100 2016.05.25D13:30:00.048000000 AAPL 98 100 '))
trades.merge_asof(quotes, on="time")
pykx.Table(pykx.q(' time ticker_x bid ask ticker_y price quantity --------------------------------------------------------------------------- 2016.05.25D13:30:00.023000000 GOOG 720.5 720.93 MSFT 51.95 75 2016.05.25D13:30:00.023000000 MSFT 51.95 51.96 MSFT 51.95 75 2016.05.25D13:30:00.030000000 MSFT 51.97 51.98 MSFT 51.95 75 2016.05.25D13:30:00.041000000 MSFT 51.99 52 MSFT 51.95 155 2016.05.25D13:30:00.048000000 GOOG 720.5 720.93 AAPL 98 100 2016.05.25D13:30:00.049000000 AAPL 97.99 98.01 AAPL 98 100 2016.05.25D13:30:00.072000000 GOOG 720.5 720.88 AAPL 98 100 2016.05.25D13:30:00.075000000 MSFT 52.01 52.03 AAPL 98 100 '))
Computations¶
# All the examples in this section will use this example table.
kx.q('N: 100')
tab = kx.q('([] sym: N?`AAPL`GOOG`MSFT; price: 250f - N?500f; traded: 100 - N?200; hold: N?0b)')
tab
pykx.Table(pykx.q(' sym price traded hold -------------------------- AAPL -199.1755 -91 1 GOOG 213.3049 -13 1 MSFT 112.9427 6 1 AAPL -36.42095 -37 0 MSFT 192.8483 44 0 MSFT 25.78804 45 0 AAPL 82.48745 38 0 AAPL -90.13045 -72 0 GOOG 36.02361 -41 1 GOOG 153.3607 -1 1 GOOG 170.2281 94 1 AAPL -148.6218 -20 0 MSFT 99.84476 41 0 MSFT -22.29842 -97 1 MSFT -237.9764 -6 1 GOOG 108.9152 32 1 GOOG -213.9602 65 0 GOOG 79.74593 -34 0 MSFT 18.28698 19 0 MSFT -19.58593 63 1 .. '))
Table.abs()¶
Table.abs(numeric_only=False)
Take the absolute value of each element in the table. Will raise an error if there are columns that contain data that have no absolute value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
numeric_only | bool | Only use columns of the table that can be converted to an absolute value. | False |
Returns:
Type | Description |
---|---|
Table / KeyedTable | The resulting table like object with only positive numerical values. |
tab.abs(numeric_only=True)
pykx.Table(pykx.q(' price traded --------------- 199.1755 91 213.3049 13 112.9427 6 36.42095 37 192.8483 44 25.78804 45 82.48745 38 90.13045 72 36.02361 41 153.3607 1 170.2281 94 148.6218 20 99.84476 41 22.29842 97 237.9764 6 108.9152 32 213.9602 65 79.74593 34 18.28698 19 19.58593 63 .. '))
Table.all()¶
Table.all(axis=0, bool_only=False, skipna=True)
Returns whether or not all values across the given axis have a truthy
value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
axis | int | The axis to calculate all across 0 is columns, 1 is rows. |
0 |
bool_only | bool | Only use columns of the table that are boolean types. | False |
skipna | bool | Ignore any null values along the axis. | True |
Returns:
Type | Description |
---|---|
Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling all on that column / row. |
tab.all()
pykx.Dictionary(pykx.q(' sym | 1 price | 1 traded| 1 hold | 0 '))
Table.any()¶
Table.any(axis=0, bool_only=False, skipna=True)
Returns whether or not any values across the given axis have a truthy
value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
axis | int | The axis to calculate any across 0 is columns, 1 is rows. |
0 |
bool_only | bool | Only use columns of the table that are boolean types. | False |
skipna | bool | Ignore any null values along the axis. | True |
Returns:
Type | Description |
---|---|
Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling any on that column / row. |
tab.any()
pykx.Dictionary(pykx.q(' sym | 1 price | 1 traded| 1 hold | 1 '))
Table.max()¶
Table.max(axis=0, skipna=True, numeric_only=False)
Returns the maximum value across the given axis.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
axis | int | The axis to calculate the maximum across 0 is columns, 1 is rows. | 0 |
skipna | bool | Ignore any null values along the axis. | True |
numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |
Returns:
Type | Description |
---|---|
Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling max on that column / row. |
tab.max()
pykx.Dictionary(pykx.q(' sym | `MSFT price | 246.298 traded| 100 hold | 1b '))
Table.min()¶
Table.min(axis=0, skipna=True, numeric_only=False)
Returns the minimum value across the given axis.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
axis | int | The axis to calculate the minimum across 0 is columns, 1 is rows. | 0 |
skipna | bool | Ignore any null values along the axis. | True |
numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |
Returns:
Type | Description |
---|---|
Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling min on that column / row. |
tab.min()
pykx.Dictionary(pykx.q(' sym | `AAPL price | -240.7506 traded| -97 hold | 0b '))
Table.sum()¶
Table.sum(axis=0, skipna=True, numeric_only=False, min_count=0)
Returns the sum of all values across the given axis.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
axis | int | The axis to calculate the sum across 0 is columns, 1 is rows. | 0 |
skipna | bool | Ignore any null values along the axis. | True |
numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |
min_count | int | If not set to 0 if there are less then min_count values across the axis a null value will be returned |
0 |
Returns:
Type | Description |
---|---|
Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling sum on that column / row. |
tab.sum()
pykx.Dictionary(pykx.q(' sym | `AAPLGOOGMSFTAAPLMSFTMSFTAAPLAAPLGOOGGOOGGOOGAAPLMSFTMSFTMSFTGOOGGOOG.. price | 1871.6 traded| 597 hold | 50i '))
Table.prod()¶
Table.prod(axis=0, skipna=True, numeric_only=False, min_count=0)
Returns the product of all values across the given axis.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
axis | int | The axis to calculate the product across 0 is columns, 1 is rows. | 0 |
skipna | bool | Ignore any null values along the axis. | True |
numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |
min_count | int | If not set to 0 if there are less then min_count values across the axis a null value will be returned |
0 |
Returns:
Type | Description |
---|---|
Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling prd on that column / row. |
# This example will use a smaller version of the above table
# as the result of calculating the product quickly goes over the integer limits.
kx.q('N: 10')
tab = kx.q('([] sym: N?`AAPL`GOOG`MSFT; price: 2.5f - N?5f; traded: 10 - N?20; hold: N?0b)')
tab[tab['traded'] == 0, 'traded'] = 1
tab[tab['price'] == 0, 'price'] = 1.0
tab
pykx.Table(pykx.q(' sym price traded hold ---------------------------- AAPL -2.043057 8 0 MSFT 0.8623372 10 1 GOOG -2.328425 -4 1 MSFT -0.09490959 1 0 AAPL 1.381233 -9 1 MSFT -1.114598 -5 0 GOOG -0.4742994 -4 1 AAPL 0.66548 1 1 MSFT 1.638484 -3 1 GOOG 2.256448 -7 0 '))
tab.prod(numeric_only=True)
pykx.Dictionary(pykx.q(' price | -0.6994716 traded| 1209600 hold | 0i '))
Setting Indexes¶
Table.set_index()¶
Table.set_index(
keys,
drop=True,
append=False,
inplace=False,
verify_integrity=False,
)
Add index/indexes to a Table/KeyedTable.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
keys | Union[Symbol/SymbolVector/Table] | The key(s) or data to key on | required |
drop | bool | Not Yet Implemented | True |
append | bool | Whether to append columns to existing index. | False |
inplace | bool | Not Yet Implemented | False |
verify_integrity | bool | Check the new index for duplicates | False |
Returns:
Type | Description |
---|---|
KeyedTable | The resulting table after the index is applied |
Examples:
Adding indexes:
kx.q('N: 10')
tab = kx.q('([] sym: N?`AAPL`GOOG`MSFT; price: 2.5f - N?5f; traded: N?0 1; hold: N?01b)')
#Setting a single index
tab.set_index('sym')
pykx.KeyedTable(pykx.q(' sym | price traded hold ----| ---------------------- AAPL| 2.221871 1 0 AAPL| -0.5340566 0 1 GOOG| -0.9124374 1 1 AAPL| -2.200924 1 1 GOOG| -0.2018351 1 1 AAPL| 2.096948 0 0 AAPL| -1.962582 1 1 AAPL| -2.228272 0 1 MSFT| 1.64242 0 1 GOOG| 2.290576 0 1 '))
#Setting multipe indexes
tab.set_index(['sym', 'traded'])
pykx.KeyedTable(pykx.q(' sym traded| price hold -----------| --------------- AAPL 1 | 2.221871 0 AAPL 0 | -0.5340566 1 GOOG 1 | -0.9124374 1 AAPL 1 | -2.200924 1 GOOG 1 | -0.2018351 1 AAPL 0 | 2.096948 0 AAPL 1 | -1.962582 1 AAPL 0 | -2.228272 1 MSFT 0 | 1.64242 1 GOOG 0 | 2.290576 1 '))
#Pass a table as index (lengths must match)
status = kx.q('{select movement from ungroup select movement:`down`up 0<=deltas price by sym from x}',tab)
tab.set_index(status)
pykx.KeyedTable(pykx.q(' movement| sym price traded hold --------| --------------------------- up | AAPL 2.221871 1 0 down | AAPL -0.5340566 0 1 down | GOOG -0.9124374 1 1 up | AAPL -2.200924 1 1 down | GOOG -0.2018351 1 1 down | AAPL 2.096948 0 0 down | AAPL -1.962582 1 1 up | AAPL -2.228272 0 1 up | MSFT 1.64242 0 1 up | GOOG 2.290576 0 1 '))
Appending:
#Default is false - previous index 'sym' deleted and replaced by 'hold'
tab.set_index('sym').set_index('hold')
pykx.KeyedTable(pykx.q(' hold| price traded ----| ----------------- 0 | 2.221871 1 1 | -0.5340566 0 1 | -0.9124374 1 1 | -2.200924 1 1 | -0.2018351 1 0 | 2.096948 0 1 | -1.962582 1 1 | -2.228272 0 1 | 1.64242 0 1 | 2.290576 0 '))
#append= True will retain 'sym' index and add 'hold' as second index
tab.set_index('sym').set_index('hold', append= True)
pykx.KeyedTable(pykx.q(' sym hold| price traded ---------| ----------------- AAPL 0 | 2.221871 1 AAPL 1 | -0.5340566 0 GOOG 1 | -0.9124374 1 AAPL 1 | -2.200924 1 GOOG 1 | -0.2018351 1 AAPL 0 | 2.096948 0 AAPL 1 | -1.962582 1 AAPL 1 | -2.228272 0 MSFT 1 | 1.64242 0 GOOG 1 | 2.290576 0 '))
Verify Integrity:
#Will allow duplicates in index:
tab.set_index('sym')
pykx.KeyedTable(pykx.q(' sym | price traded hold ----| ---------------------- AAPL| 2.221871 1 0 AAPL| -0.5340566 0 1 GOOG| -0.9124374 1 1 AAPL| -2.200924 1 1 GOOG| -0.2018351 1 1 AAPL| 2.096948 0 0 AAPL| -1.962582 1 1 AAPL| -2.228272 0 1 MSFT| 1.64242 0 1 GOOG| 2.290576 0 1 '))
#Will error as 'sym' has duplicates
try:
tab.set_index('sym', verify_integrity= True)
except kx.QError as e:
print(f'Caught Error: {e}')
Caught Error: Index has duplicate key(s)