import os
# While this feature is in beta it must be explicitely enabled with this environment variable
# or by adding the `--pandas-api` flag to the QARGS environment variable
os.environ['PYKX_ENABLE_PANDAS_API'] = 'true'
import pykx as kx
import numpy as np
Constructing Tables¶
Table¶
Create a table from a list of rows or by converting a Python dictionary object
Parameters:
Name | Type | Description | Default |
---|---|---|---|
x | Union[list, array] | An array like object containing the contents of each row of the table. | None |
data | dict | A dictionary to be converted into a Table object. | None |
columns | list[str] | A list of column names to use when constructing from an array of rows. | None |
Returns:
Type | Description |
---|---|
Table | The newly constructed table object. |
Examples:
Create a table from a dictionary object.
kx.Table(data={'x': list(range(10)), 'y': [10 - x for x in range(10)]})
pykx.Table(pykx.q(' x y ---- 0 10 1 9 2 8 3 7 4 6 5 5 6 4 7 3 8 2 9 1 '))
Create a Table from an array like object.
kx.Table([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
pykx.Table(pykx.q(' x x1 ---- 0 1 2 3 4 5 6 7 8 9 '))
Create a Table from an array like object and provide names for the columns to use.
kx.Table([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]], columns=['x', 'y', 'z'])
pykx.Table(pykx.q(' x y z ------- 0 1 2 3 4 5 6 7 8 9 10 11 '))
Keyed Table¶
Create a keyed table from a list of rows or by converting a Python dictionary object
Parameters:
Name | Type | Description | Default |
---|---|---|---|
x | Union[list, array] | An array like object containing the contents of each row of the table. | None |
data | dict | A dictionary to be converted into a Table object. | None |
columns | list[str] | A list of column names to use when constructing from an array of rows. | None |
index | list[Any] | An array like object to use as the index column of the table. | None |
Returns:
Type | Description |
---|---|
KeyedTable | The newly constructed keyed table object. |
Examples:
Create a keyed table from a dictionary object.
kx.KeyedTable(data={'x': list(range(10)), 'y': list(10 - x for x in range(10))})
pykx.KeyedTable(pykx.q(' idx| x y ---| ---- 0 | 0 10 1 | 1 9 2 | 2 8 3 | 3 7 4 | 4 6 5 | 5 5 6 | 6 4 7 | 7 3 8 | 8 2 9 | 9 1 '))
Create a keyed table from a list of rows.
kx.KeyedTable([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
pykx.KeyedTable(pykx.q(' idx| x x1 ---| ---- 0 | 0 1 1 | 2 3 2 | 4 5 3 | 6 7 4 | 8 9 '))
Create a keyed table from a list of rows and provide names for the resulting columns.
kx.KeyedTable([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]], columns=['x', 'y', 'z'])
pykx.KeyedTable(pykx.q(' idx| x y z ---| ------- 0 | 0 1 2 1 | 3 4 5 2 | 6 7 8 3 | 9 10 11 '))
Create a keyed table with a specified index column.
kx.KeyedTable(data={'x': list(range(10)), 'y': list(10 - x for x in range(10))}, index=[2 * x for x in range(10)])
pykx.KeyedTable(pykx.q(' idx| x y ---| ---- 0 | 0 10 2 | 1 9 4 | 2 8 6 | 3 7 8 | 4 6 10 | 5 5 12 | 6 4 14 | 7 3 16 | 8 2 18 | 9 1 '))
Metadata¶
kx.q('N: 1000')
tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')
tab
pykx.Table(pykx.q(' x y z w v ------------------------ 0 AAPL 280.0322 868 200 1 MSFT 48.24615 18 2 AAPL 66.99184 301 50 3 MSFT 435.7574 951 0 4 AAPL 385.586 365 50 5 AAPL 325.6626 386 0 6 GOOG 68.13659 844 50 7 AAPL 399.9134 972 100 8 AAPL 310.2315 457 50 9 GOOG 486.8679 475 250 10 AAPL 483.0258 28 200 11 MSFT 210.9703 387 250 12 AAPL 49.11884 205 0 13 GOOG 463.9106 628 14 MSFT 144.0166 848 250 15 AAPL 258.0005 832 50 16 MSFT 420.7807 117 200 17 GOOG 327.0546 914 250 18 MSFT 421.2875 470 0 19 MSFT 481.383 446 50 .. '))
Table.columns¶
Get the name of each column in the table
tab.columns
['x', 'y', 'z', 'w', 'v']
Table.dtypes¶
Get the pandas dtype of each column
tab.dtypes
x int64 y object z float64 w int64 v int64 dtype: object
Table.empty¶
Returns True if the table is empty otherwise returns False.
tab.empty
False
Table.ndim¶
Get the nuber of columns within the table.
tab.ndim
5
Table.shape¶
Get the shape of the table as a tuple (number of rows, number of columns).
tab.shape
(1000, 5)
Table.size¶
Get the number of values in the table (rows * cols).
tab.size
5000
Table.mean()¶
Table.mean(axis=0, numeric_only=False)
Get the mean of values across the requested axis.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
axis | int | The axis to calculate mean across 0 is columns, 1 is rows. | 0 |
numeric_only | bool | Include only columns / rows with numeric data. | False |
Returns:
Type | Description |
---|---|
Dictionary | The mean across each row / column with the key corresponding to the row number or column name. |
Examples:
Calculate the mean across the columns of a table
tab = kx.Table(data=
{
'a': [1, 2, 2, 4],
'b': [1, 2, 6, 7],
'c': [7, 8, 9, 10],
'd': [7, 11, 14, 14]
}
)
tab
pykx.Table(pykx.q(' a b c d --------- 1 1 7 7 2 2 8 11 2 6 9 14 4 7 10 14 '))
tab.mean()
pykx.Dictionary(pykx.q(' a| 2.25 b| 4 c| 8.5 d| 11.5 '))
Calculate the mean across the rows of a table
tab.mean(axis=1)
pykx.Dictionary(pykx.q(' 0| 4 1| 5.75 2| 7.75 3| 8.75 '))
Table.median()¶
Table.median(axis=0, numeric_only=False)
Get the median of values across the requested axis.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
axis | int | The axis to calculate median across 0 is columns, 1 is rows. | 0 |
numeric_only | bool | Include only columns / rows with numeric data. | False |
Returns:
Type | Description |
---|---|
Dictionary | The median across each row / column with the key corresponding to the row number or column name. |
Examples:
Calculate the median across the columns of a table
tab = kx.Table(data=
{
'a': [1, 2, 2, 4],
'b': [1, 2, 6, 7],
'c': [7, 8, 9, 10],
'd': [7, 11, 14, 14]
}
)
tab
pykx.Table(pykx.q(' a b c d --------- 1 1 7 7 2 2 8 11 2 6 9 14 4 7 10 14 '))
tab.median()
pykx.Dictionary(pykx.q(' a| 2 b| 4 c| 8.5 d| 12.5 '))
Calculate the median across the rows of a table
tab.median(axis=1)
pykx.Dictionary(pykx.q(' 0| 4 1| 5 2| 7.5 3| 8.5 '))
Table.mode()¶
Table.mode(axis=0, numeric_only=False, dropna=True)
Get the mode of values across the requested axis.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
axis | int | The axis to calculate mode across 0 is columns, 1 is rows. | 0 |
numeric_only | bool | Include only columns / rows with numeric data. | False |
dropna | bool | Remove null values from the data before calculating the mode. | True |
Returns:
Type | Description |
---|---|
Table | The mode across each row / column with the column corresponding to the row number or column name. |
Examples:
Calculate the mode across the columns of a table
tab = kx.Table(data=
{
'a': [1, 2, 2, 4],
'b': [1, 2, 6, 7],
'c': [7, 8, 9, 10],
'd': [7, 11, 14, 14]
}
)
tab
pykx.Table(pykx.q(' a b c d --------- 1 1 7 7 2 2 8 11 2 6 9 14 4 7 10 14 '))
tab.mode()
pykx.Table(pykx.q(' a b c d --------- 2 1 7 14 2 8 6 9 7 10 '))
Calculate the median across the rows of a table
tab.mode(axis=1)
pykx.Table(pykx.q(' idx 0 1 2 3 ------------- 0 1 7 1 2 2 2 6 9 14 3 4 7 10 14 '))
Calculate the mode across columns and keep null values.
tab = kx.Table(data=
{
'x': [0, 1, 2, 3, 4, 5, 6, 7, np.NaN, np.NaN],
'y': [10, 11, 12, 13, 14, 15, 16, 17, 18, np.NaN],
'z': ['a', 'b', 'c', 'd', 'd', 'e', 'e', 'f', 'g', 'h']
}
)
tab
pykx.Table(pykx.q(' x y z ------- 0 10 a 1 11 b 2 12 c 3 13 d 4 14 d 5 15 e 6 16 e 7 17 f 0n 18 g 0n 0n h '))
tab.mode(dropna=False)
pykx.Table(pykx.q(' x y z ------ 10 d 11 e 12 13 14 15 16 17 18 0n '))
Indexing¶
# The examples in this section will use this example table filled with random data
kx.q('N: 1000')
tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')
tab
pykx.Table(pykx.q(' x y z w v ------------------------ 0 AAPL 60.54614 184 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 5 MSFT 12.28187 127 50 6 MSFT 76.00103 898 200 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 10 MSFT 15.16717 964 50 11 GOOG 164.3905 826 12 MSFT 81.26465 593 250 13 MSFT 218.853 332 200 14 MSFT 181.2571 502 50 15 MSFT 311.1167 705 100 16 GOOG 81.45016 110 17 AAPL 70.85145 722 50 18 AAPL 172.3351 410 100 19 AAPL 9.906452 757 200 .. '))
Table.head()¶
Table.head(n=5)
Get the first n rows from a table.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n | int | The number of rows to return. | 5 |
Returns:
Type | Description |
---|---|
Table | The first n rows of the table. |
Examples:
Return the first 5 rows of the table.
tab.head()
pykx.Table(pykx.q(' x y z w v ----------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 '))
Return the first 10 rows of the table.
tab.head(10)
pykx.Table(pykx.q(' x y z w v ----------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 5 MSFT 12.28187 127 50 6 MSFT 76.00103 898 200 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 '))
Table.tail()¶
Table.tail(n=5)
Get the last n rows from a table.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n | int | The number of rows to return. | 5 |
Returns:
Type | Description |
---|---|
Table | The last n rows of the table. |
Examples:
Return the last 5 rows of the table.
tab.tail()
pykx.Table(pykx.q(' x y z w v ------------------------ 995 AAPL 54.79148 295 996 AAPL 448.0876 779 997 GOOG 173.26 896 0 998 GOOG 350.8906 200 50 999 MSFT 409.2486 807 '))
Return the last 10 rows of the table.
tab.tail(10)
pykx.Table(pykx.q(' x y z w v ------------------------- 990 GOOG 407.4229 546 100 991 AAPL 18.6764 228 50 992 AAPL 408.8369 665 200 993 AAPL 282.1039 166 994 GOOG 69.54563 273 200 995 AAPL 54.79148 295 996 AAPL 448.0876 779 997 GOOG 173.26 896 0 998 GOOG 350.8906 200 50 999 MSFT 409.2486 807 '))
Table.get()¶
Table.get(key, default=None)
Get a column or columns from a table by key, if the key does not exist return the default value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
key | Union[str, list[str]] | The column name or list of names to get from the table. | required |
default | int | The default value if the key is not found. | None |
Returns:
Type | Description |
---|---|
Union[Table, Any] | A table contatining only the columns requested or the default value. |
Examples:
Get the y
column from the table.
tab.get('y')
pykx.Table(pykx.q(' y ---- AAPL GOOG GOOG MSFT MSFT MSFT MSFT AAPL AAPL AAPL MSFT GOOG MSFT MSFT MSFT MSFT GOOG AAPL AAPL AAPL .. '))
Get the y
and z
columns from the table.
tab.get(['y', 'z'])
pykx.Table(pykx.q(' y z ------------- AAPL 60.54614 GOOG 292.5522 GOOG 283.0627 MSFT 42.38194 MSFT 453.4634 MSFT 12.28187 MSFT 76.00103 AAPL 17.802 AAPL 68.05945 AAPL 405.7366 MSFT 15.16717 GOOG 164.3905 MSFT 81.26465 MSFT 218.853 MSFT 181.2571 MSFT 311.1167 GOOG 81.45016 AAPL 70.85145 AAPL 172.3351 AAPL 9.906452 .. '))
Attempt to get the q
column from the table and recieve none as that column does not exist.
print(tab.get('q'))
None
Attempt to get the q
column from the table and recieve the default value not found
as that column does not exist.
tab.get('q', 'not found')
'not found'
Table.at[]¶
Table.at[row, col]
Access a single value for a row / column pair.
Similar to loc[]
, in that both provide label-based lookups. Use at
if you only need to get or set a single value.
The at
property can be used for both assignment and retrieval of values at a given row and column.
Examples:
Get the value of the z
column in the 997th row.
tab.at[997, 'z']
pykx.FloatAtom(pykx.q('173.26'))
Reassign the value of the z
column in the 997th row to 3.14159
.
tab.at[997, 'z'] = 3.14159
tab.at[997, 'z']
pykx.FloatAtom(pykx.q('3.14159'))
Table.loc[]¶
Table.loc[:, :]
Access a group of rows and columns by label or by BooleanVector.
loc
is a label based form of indexing, but may also be used with a boolean array.
Allowed inputs are:
- A single label, e.g. 5 or 'a', (note that 5 is interpreted as a label of the index, and never as an integer position along the index)
- A list or array of labels, e.g. ['a', 'b', 'c']
- A slice object with labels, e.g. 'a':'f'
- Warning contrary to usual python slices, both the start and the stop are included
- A BooleanVector of the same length as the axis being sliced
- An alignable BooleanVector. The index of the key will be aligned before masking
- An alignable Index. The Index of the returned selection will be the input
- A callable function with one argument (the calling Table like object) and that returns valid output for indexing (e.g. one of the above)
Note:
When the Pandas API is enabled, using []
to index into a table will use Table.loc[]
Returns:
Type | Description |
---|---|
Table | A table contatining only the columns / rows requested. |
Examples:
Get every row in the y
column.
tab[:, 'y']
pykx.Table(pykx.q(' y ---- AAPL GOOG GOOG MSFT MSFT MSFT MSFT AAPL AAPL AAPL MSFT GOOG MSFT MSFT MSFT MSFT GOOG AAPL AAPL AAPL .. '))
Get all rows of the table where the value in the z
column is greater than 250.0
tab[tab['z'] > 250.0]
pykx.Table(pykx.q(' x y z w v ------------------------ 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 4 MSFT 453.4634 69 100 9 AAPL 405.7366 56 0 15 MSFT 311.1167 705 100 22 MSFT 325.2313 425 100 23 GOOG 389.6867 143 200 26 MSFT 374.9333 225 27 AAPL 351.9635 242 200 28 AAPL 292.2184 153 50 30 MSFT 445.9395 900 100 31 AAPL 406.9874 495 250 32 GOOG 484.709 127 37 MSFT 354.8972 965 39 MSFT 323.4569 825 100 40 GOOG 446.6471 161 0 44 GOOG 465.8163 447 0 47 AAPL 251.128 608 250 49 MSFT 315.3486 997 51 GOOG 379.302 16 0 .. '))
Replace all null values in the column v
with the value -100
.
tab.loc[tab['v'] == kx.q('0N'), 'v'] = -100
tab
pykx.Table(pykx.q(' x y z w v ------------------------- 0 AAPL 60.54614 184 -100 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 5 MSFT 12.28187 127 50 6 MSFT 76.00103 898 200 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 10 MSFT 15.16717 964 50 11 GOOG 164.3905 826 -100 12 MSFT 81.26465 593 250 13 MSFT 218.853 332 200 14 MSFT 181.2571 502 50 15 MSFT 311.1167 705 100 16 GOOG 81.45016 110 -100 17 AAPL 70.85145 722 50 18 AAPL 172.3351 410 100 19 AAPL 9.906452 757 200 .. '))
Replace all locations in column v
where the value is -100
with a null.
tab[tab['v'] == -100, 'v'] = kx.q('0N')
tab
pykx.Table(pykx.q(' x y z w v ------------------------ 0 AAPL 60.54614 184 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 5 MSFT 12.28187 127 50 6 MSFT 76.00103 898 200 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 10 MSFT 15.16717 964 50 11 GOOG 164.3905 826 12 MSFT 81.26465 593 250 13 MSFT 218.853 332 200 14 MSFT 181.2571 502 50 15 MSFT 311.1167 705 100 16 GOOG 81.45016 110 17 AAPL 70.85145 722 50 18 AAPL 172.3351 410 100 19 AAPL 9.906452 757 200 .. '))
Table.iloc[]¶
Table.iloc[:, :]
Purely integer-location based indexing for selection by position.
iloc
is primarily integer position based (from 0 to length-1 of the axis), but may also be used with a BooleanVector.
Allowed inputs are:
- An integer, e.g. 5.
- A list or array of integers, e.g. [4, 3, 0].
- A slice object with ints, e.g. 1:7.
- A BooleanVector.
- A callable function with one argument (the calling Series or DataFrame) and that returns valid output for indexing (one of the above). This is useful in method chains, when you don’t have a reference to the calling object, but would like to base your selection on some value.
- A tuple of row and column indexes. The tuple elements consist of one of the above inputs, e.g. (0, 1).
Returns:
Type | Description |
---|---|
Table | A table contatining only the columns / rows requested. |
Examples:
Get the second row from a table.
tab.iloc[1]
pykx.Table(pykx.q(' x y z w v ----------------------- 1 GOOG 292.5522 299 100 '))
Get the first 5 rows from a table.
tab.iloc[:5]
pykx.Table(pykx.q(' x y z w v ----------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 '))
Get all rows of the table where the y
column is equal to AAPL
.
tab.iloc[tab['y'] == 'AAPL']
pykx.Table(pykx.q(' x y z w v ------------------------ 0 AAPL 60.54614 184 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 17 AAPL 70.85145 722 50 18 AAPL 172.3351 410 100 19 AAPL 9.906452 757 200 20 AAPL 199.5309 790 25 AAPL 27.35117 145 0 27 AAPL 351.9635 242 200 28 AAPL 292.2184 153 50 31 AAPL 406.9874 495 250 35 AAPL 199.1284 252 200 38 AAPL 192.8447 604 45 AAPL 241.7971 919 50 46 AAPL 25.79979 131 50 47 AAPL 251.128 608 250 54 AAPL 374.937 986 57 AAPL 401.5671 996 58 AAPL 181.9741 768 .. '))
Get all rows of the table where the y
column is equal to AAPL
, and only return the y
, z
and w
columns.
tab.iloc[tab['y'] == 'AAPL', ['y', 'z', 'w']]
pykx.Table(pykx.q(' y z w ----------------- AAPL 60.54614 184 AAPL 17.802 822 AAPL 68.05945 197 AAPL 405.7366 56 AAPL 70.85145 722 AAPL 172.3351 410 AAPL 9.906452 757 AAPL 199.5309 790 AAPL 27.35117 145 AAPL 351.9635 242 AAPL 292.2184 153 AAPL 406.9874 495 AAPL 199.1284 252 AAPL 192.8447 604 AAPL 241.7971 919 AAPL 25.79979 131 AAPL 251.128 608 AAPL 374.937 986 AAPL 401.5671 996 AAPL 181.9741 768 .. '))
Replace all null values in the column v
with the value -100
.
tab.iloc[tab['v'] == kx.q('0N'), 'v'] = -100
tab
pykx.Table(pykx.q(' x y z w v ------------------------- 0 AAPL 60.54614 184 -100 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 5 MSFT 12.28187 127 50 6 MSFT 76.00103 898 200 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 10 MSFT 15.16717 964 50 11 GOOG 164.3905 826 -100 12 MSFT 81.26465 593 250 13 MSFT 218.853 332 200 14 MSFT 181.2571 502 50 15 MSFT 311.1167 705 100 16 GOOG 81.45016 110 -100 17 AAPL 70.85145 722 50 18 AAPL 172.3351 410 100 19 AAPL 9.906452 757 200 .. '))
Table.pop()¶
Table.pop(item)
Remove a column or columns from a table by column name and return the column after it has been removed.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
item | Union[str, list[str]] | The column name or list of names to pop from the table. | required |
Returns:
Type | Description |
---|---|
Table | A table contatining only the columns removed from the input table. |
Examples:
Remove the v
column from the table and return it.
print(f'{tab.head()}\n\n')
print('Pop the `v` column out of the table')
print(f'{tab.pop("v").head()}\n\n')
print('Updated Table')
print(tab.head())
x y z w v ------------------------ 0 AAPL 60.54614 184 -100 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 Pop the `v` column out of the table v ---- -100 100 250 0 100 Updated Table x y z w ------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 2 GOOG 283.0627 411 3 MSFT 42.38194 713 4 MSFT 453.4634 69
Remove the z
and w
columns from the table and return them.
print(f'{tab.head()}\n\n')
print('Pop the `z` and `w` columns out of the table')
print(f'{tab.pop(["z", "w"]).head()}\n\n')
print('Updated Table')
print(tab.head())
x y z w ------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 2 GOOG 283.0627 411 3 MSFT 42.38194 713 4 MSFT 453.4634 69 Pop the `z` and `w` columns out of the table z w ------------ 60.54614 184 292.5522 299 283.0627 411 42.38194 713 453.4634 69 Updated Table x y ------ 0 AAPL 1 GOOG 2 GOOG 3 MSFT 4 MSFT
Reindexing¶
# The examples in this section will use this example table filled with random data
kx.q('N: 1000')
tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')
tab.head()
pykx.Table(pykx.q(' x y z w v ----------------------- 0 AAPL 246.3763 948 100 1 GOOG 337.246 652 100 2 MSFT 143.1573 420 50 3 AAPL 314.778 210 200 4 AAPL 444.4764 459 0 '))
Drop rows.
tab.drop([0, 2, 4, 6, 8, 10]).head()
pykx.Table(pykx.q(' x y z w v ----------------------- 1 GOOG 337.246 652 100 3 AAPL 314.778 210 200 5 MSFT 409.7593 916 100 7 AAPL 478.1098 608 0 9 AAPL 58.89948 419 250 '))
Drop columns.
tab.drop('y', axis=1).head()
pykx.Table(pykx.q(' x z w v ------------------ 0 246.3763 948 100 1 337.246 652 100 2 143.1573 420 50 3 314.778 210 200 4 444.4764 459 0 '))
Drop duplicates.
tab2 = kx.q('([] 100?`AAPL`GOOG`MSFT; 100?3)')
tab2
pykx.Table(pykx.q(' x x1 ------- GOOG 2 MSFT 2 AAPL 2 MSFT 1 GOOG 2 AAPL 2 AAPL 2 MSFT 0 AAPL 1 GOOG 0 GOOG 2 MSFT 1 GOOG 1 MSFT 0 AAPL 0 MSFT 2 MSFT 2 AAPL 0 GOOG 1 MSFT 2 .. '))
tab2.drop_duplicates()
pykx.Table(pykx.q(' x x1 ------- GOOG 2 MSFT 2 AAPL 2 MSFT 1 MSFT 0 AAPL 1 GOOG 0 GOOG 1 AAPL 0 '))
Renaming columns.
tab.head()
pykx.Table(pykx.q(' x y z w v ----------------------- 0 AAPL 246.3763 948 100 1 GOOG 337.246 652 100 2 MSFT 143.1573 420 50 3 AAPL 314.778 210 200 4 AAPL 444.4764 459 0 '))
tab.rename(columns={'y': 'symbol', 'z': 'price'}).head()
pykx.Table(pykx.q(' x symbol price w v ------------------------- 0 AAPL 246.3763 948 100 1 GOOG 337.246 652 100 2 MSFT 143.1573 420 50 3 AAPL 314.778 210 200 4 AAPL 444.4764 459 0 '))
Sampling data.
tab.sample(n=10)
pykx.Table(pykx.q(' x y z w v ------------------------- 876 GOOG 376.395 336 100 654 MSFT 285.7745 69 0 178 GOOG 268.9795 537 50 105 MSFT 267.6886 339 200 451 AAPL 166.5928 567 247 MSFT 422.5578 626 50 688 GOOG 332.8294 150 250 418 MSFT 328.3853 540 250 500 GOOG 378.7218 160 50 63 AAPL 153.0027 358 0 '))
tab.sample(frac=0.1)
pykx.Table(pykx.q(' x y z w v ------------------------- 115 AAPL 430.3011 371 0 420 AAPL 474.9089 833 100 806 MSFT 389.469 581 250 25 MSFT 399.2739 372 0 895 AAPL 102.9643 871 100 273 MSFT 377.4773 958 250 388 MSFT 475.8273 803 250 386 GOOG 448.5738 758 200 363 GOOG 499.7035 132 200 489 GOOG 308.7938 4 50 77 AAPL 382.6731 988 0 20 MSFT 12.63442 321 250 482 AAPL 236.7455 94 50 699 AAPL 190.6258 715 100 107 GOOG 95.34587 179 250 877 AAPL 287.4927 969 897 MSFT 326.4463 126 0 934 AAPL 352.6163 622 250 662 MSFT 302.0044 125 0 580 MSFT 450.5893 172 50 .. '))
tab.sample(frac=0.1, replace=True)
pykx.Table(pykx.q(' x y z w v ------------------------- 172 AAPL 475.7677 581 100 908 GOOG 164.7975 916 949 MSFT 495.2473 672 0 812 AAPL 408.4684 493 100 269 MSFT 291.8758 322 200 390 GOOG 464.4193 578 0 720 MSFT 479.4839 765 50 103 MSFT 52.89304 853 0 203 GOOG 27.14407 578 200 216 MSFT 359.1574 455 100 331 MSFT 269.0314 570 250 896 MSFT 34.10398 376 50 358 MSFT 225.2531 130 50 539 GOOG 229.2499 504 200 833 MSFT 70.12554 750 100 594 MSFT 33.19471 549 200 385 MSFT 208.6966 485 200 539 GOOG 229.2499 504 200 8 MSFT 221.2105 308 50 154 GOOG 174.6997 417 50 .. '))
Table.select_dtypes()¶
Table.select_dtypes(include=None, exclude=None) Return a subset of the DataFrame’s columns based on the column dtypes.
Parameters:¶
Name | Type | Description | Default |
---|---|---|---|
include | list-like/scalar | A selection of dtypes or strings to be included. | None |
exclude | list-like/scalar | A selection of dtypes or strings to be excluded. | None |
At least one of these parameters must be supplied.
Returns:¶
Type | Description |
---|---|
Dataframe | The subset of the frame including the dtypes in include and excluding the dtypes in exclude . |
Examples:¶
df = kx.q('([] c1:`a`b`c; c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)')
# Exclude symbol column
print(df.select_dtypes(exclude = kx.SymbolVector))
# Include a list of column types
print(df.select_dtypes(include = [kx.ShortVector, kx.LongVector]))
c2 c3 c4 -------- 1 1 1 2 2 2 3 3 3 c2 c3 ----- 1 1 2 2 3 3
Table.astype()¶
Table.astype(dtype, copy=True, errors='raise')
Cast a column/columns of the Dataframes object to a specified dtype
.
Parameters:¶
Name | Type | Description | Default |
---|---|---|---|
dtype | data type, or dict of column name -> data type | Use a PyKx wrapper data type or Python type to cast all columns to the same type. Alternatively, use {col: dtype, …}, where col is a column label and dtype is PyKx wrapper data type to cast one or more of the DataFrame’s columns to column-specific types. | |
copy | Boolean | Default of True, False not implemented | True |
errors | {‘raise’, ‘ignore’} | If passed anything other than 'raise', it will return the dataframe | 'raise' |
Returns:¶
Type | Description |
---|---|
Dataframe | The dataframe with columns casted according to passed dtypes |
Examples:¶
df = kx.q('([] c1:1 2 3i; c2:1 2 3j; c3:1 2 3h; c4:1 2 3i)')
## Casting all columns to dtype LongVector
print(df.astype(kx.LongVector))
## Casting as specified in the dcitionary supplied with
## dtype per column
print(df.astype({'c1':kx.LongVector, 'c2':'kx.ShortVector'}))
df = kx.q('([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j; c4:("abc";"def";"ghi");c5:"abc";c6:(1 2 3;4 5 6;7 8 9))')
## Casting char and string columns to symbol columns
print(df.astype({'c4':kx.SymbolVector, 'c5':kx.SymbolVector}))
c1 c2 c3 c4 ----------- 1 1 1 1 2 2 2 2 3 3 3 3 c1 c2 c3 c4 ----------- 1 1 1 1 2 2 2 2 3 3 3 3 c1 c2 c3 c4 c5 c6 ------------------------------------------------- 2023.05.19D12:59:46.012915784 abc 1 abc a 1 2 3 2023.05.19D12:59:46.012915784 def 2 def b 4 5 6 2023.05.19D12:59:46.012915784 ghi 3 ghi c 7 8 9
Merging¶
Table.merge()¶
Table.merge(
right,
how='inner',
on=None,
left_on=None,
right_on=None,
left_index=False,
right_index=False,
sort=False,
suffixes=('_x', '_y'),
copy=True,
validate=None,
q_join=False
)
Merge Table or KeyedTable objects with a database-style join.
The join is done on columns or indexes. If joining columns on columns, the DataFrame indexes will be ignored. Otherwise if joining indexes on indexes or indexes on a column or columns, the index will be passed on. When performing a cross merge, no column specifications to merge on are allowed.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
right | Union[Table/KeyedTable] | The object to merge with. | required |
how | str | The type of join to be used. One of {‘left’, ‘right’, ‘outer’, ‘inner’, ‘cross’}. | ‘inner’ |
on | str | The column name to join on. | None |
left_on | str | The column name in the left table to join on. | None |
right_on | str | The column name in the right table to join on. | None |
left_index | bool | Use the index of the left Table. | False |
right_index | bool | Use the index of the right Table. | False |
sort | bool | Sort the join keys of the resulting table. | False |
suffixes | Tuple(str, str) | The number of rows to return. | ('_x', '_y') |
copy | bool | If False avoid copies and modify the input table. | None |
validate | str | If specified checks if merge matches specified type. - “one_to_one” or “1:1”: check if merge keys are unique in both left and right datasets. - “one_to_many” or “1:m”: check if merge keys are unique in left dataset. - “many_to_one” or “m:1”: check if merge keys are unique in right dataset. - “many_to_many” or “m:m”: allowed, but does not result in checks. |
None |
q_join | bool | If True perform native q joins instead of the pandas SQL like joins. More documentation around these joins can be found here. | False |
Returns:
Type | Description |
---|---|
Table / KeyedTable | The resulting table like object after the join has been preformed. |
Examples:
Merge tab1 and tab2 on the lkey and rkey columns. The value columns have the default suffixes, _x and _y, appended.
tab1 = kx.Table(data={'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
tab2 = kx.Table(data={'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
tab1.merge(tab2, left_on='lkey', right_on='rkey')
pykx.Table(pykx.q(' lkey value_x rkey value_y ------------------------- foo 1 foo 5 foo 1 foo 8 foo 5 foo 5 foo 5 foo 8 bar 2 bar 6 baz 3 baz 7 '))
Merge tab1 and tab2 on the lkey and rkey columns using a native q inner join. The value columns have the default suffixes, _x and _y, appended.
tab1.merge(tab2, left_on='lkey', right_on='rkey', q_join=True)
pykx.Table(pykx.q(' lkey value_x rkey value_y ------------------------- foo 1 foo 5 bar 2 bar 6 baz 3 baz 7 foo 5 foo 5 '))
Merge tab1 and tab2 with specified left and right suffixes appended to any overlapping columns.
tab1.merge(tab2, left_on='lkey', right_on='rkey', suffixes=('_left', '_right'))
pykx.Table(pykx.q(' lkey value_left rkey value_right -------------------------------- foo 1 foo 5 foo 1 foo 8 foo 5 foo 5 foo 5 foo 8 bar 2 bar 6 baz 3 baz 7 '))
Merge tab1 and tab2 but raise an exception if the Tables have any overlapping columns.
try:
tab1.merge(tab2, left_on='lkey', right_on='rkey', suffixes=(False, False))
except BaseException as e:
print(f'Caught Error: {e}')
Caught Error: Columns overlap but no suffix specified: ['value']
tab1 = kx.Table(data={'a': ['foo', 'bar'], 'b': [1, 2]})
tab2 = kx.Table(data={'a': ['foo', 'baz'], 'c': [3, 4]})
Merge tab1 and tab2 on the a
column using an inner join.
tab1.merge(tab2, how='inner', on='a')
pykx.Table(pykx.q(' a b c ------- foo 1 3 '))
Merge tab1 and tab2 on the a
column using a left join.
tab1.merge(tab2, how='left', on='a')
--------------------------------------------------------------------------- QError Traceback (most recent call last) Cell In[67], line 1 ----> 1 tab1.merge(tab2, how='left', on='a') File /usr/local/lib/python3.10/site-packages/pykx/pandas_api/__init__.py:17, in api_return.<locals>.return_val(*args, **kwargs) 15 if issubclass(type(tab), MetaAtomic): 16 tab = tab.tab ---> 17 res = func(*args, **kwargs) 18 if not issubclass(type(res), PandasIndexing): 19 return res File /usr/local/lib/python3.10/site-packages/pykx/pandas_api/pandas_merge.py:334, in PandasMerge.merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, validate, q_join) 327 res = _q_merge_tables( 328 left, 329 right, 330 how, 331 added_idx 332 ) 333 else: --> 334 res = _merge_tables(left, right, on, how, added_idx, left_index, right_index, distinct) 335 res = _clean_result(self, res, how, left_index, right_index, added_idx, left, right) 336 if sort and not added_idx: File /usr/local/lib/python3.10/site-packages/pykx/pandas_api/pandas_merge.py:189, in _merge_tables(left, right, on, how, added_idx, left_index, right_index, distinct) 187 return res 188 elif how == 'left': --> 189 res = __left_join(left, right, on, distinct) 190 elif how == 'right': 191 cols = q.cols(left).py() File /usr/local/lib/python3.10/site-packages/pykx/pandas_api/pandas_merge.py:31, in __left_join(left, right, on, distinct) 29 if distinct: 30 return q.ej(on, left, right) ---> 31 return q( 32 '{[k; t; tt] a:ej[k; t; tt]; a uj t except ((count[k] _ cols tt)_a)}', 33 on, 34 left, 35 right 36 ) File /usr/local/lib/python3.10/site-packages/pykx/embedded_q.py:142, in EmbeddedQ.__call__(self, query, wait, sync, *args) 140 result = _keval(bytes(wrappers.CharVector(query)), *[wrappers.K(x) for x in args]) 141 if wait is None or wait: --> 142 return factory(result, False) 143 return self('::', wait=True) File /usr/local/lib/python3.10/site-packages/pykx/_wrappers.pyx:499, in pykx._wrappers._factory() File /usr/local/lib/python3.10/site-packages/pykx/_wrappers.pyx:492, in pykx._wrappers.factory() QError: type
Merge tab1 and tab2 using a cross join.
tab1 = kx.Table(data={'left': ['foo', 'bar']})
tab2 = kx.Table(data={'right': [7, 8]})
tab1.merge(tab2, how='cross')
pykx.Table(pykx.q(' left right ---------- foo 7 foo 8 bar 7 bar 8 '))