import os
# While this feature is in beta it must be explicitely enabled with this environment variable
# or by adding the `--pandas-api` flag to the QARGS environment variable
os.environ['PYKX_ENABLE_PANDAS_API'] = 'true'
import pykx as kx
Metadata¶
kx.q('N: 1000')
tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')
tab
pykx.Table(pykx.q(' x y z w v ------------------------ 0 AAPL 280.0322 868 200 1 MSFT 48.24615 18 2 AAPL 66.99184 301 50 3 MSFT 435.7574 951 0 4 AAPL 385.586 365 50 5 AAPL 325.6626 386 0 6 GOOG 68.13659 844 50 7 AAPL 399.9134 972 100 8 AAPL 310.2315 457 50 9 GOOG 486.8679 475 250 10 AAPL 483.0258 28 200 11 MSFT 210.9703 387 250 12 AAPL 49.11884 205 0 13 GOOG 463.9106 628 14 MSFT 144.0166 848 250 15 AAPL 258.0005 832 50 16 MSFT 420.7807 117 200 17 GOOG 327.0546 914 250 18 MSFT 421.2875 470 0 19 MSFT 481.383 446 50 .. '))
Table.columns¶
Get the name of each column in the table
tab.columns
['x', 'y', 'z', 'w', 'v']
Table.dtypes¶
Get the pandas dtype of each column
tab.dtypes
x int64 y object z float64 w int64 v int64 dtype: object
Table.empty¶
Returns True if the table is empty otherwise returns False.
tab.empty
False
Table.ndim¶
Get the nuber of columns within the table.
tab.ndim
5
Table.shape¶
Get the shape of the table as a tuple (number of rows, number of columns).
tab.shape
(1000, 5)
Table.size¶
Get the number of values in the table (rows * cols).
tab.size
5000
Indexing¶
# The examples in this section will use this example table filled with random data
kx.q('N: 1000')
tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')
tab
pykx.Table(pykx.q(' x y z w v ------------------------ 0 AAPL 60.54614 184 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 5 MSFT 12.28187 127 50 6 MSFT 76.00103 898 200 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 10 MSFT 15.16717 964 50 11 GOOG 164.3905 826 12 MSFT 81.26465 593 250 13 MSFT 218.853 332 200 14 MSFT 181.2571 502 50 15 MSFT 311.1167 705 100 16 GOOG 81.45016 110 17 AAPL 70.85145 722 50 18 AAPL 172.3351 410 100 19 AAPL 9.906452 757 200 .. '))
Table.head()¶
Table.head(n=5)
Get the first n rows from a table.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n | int | The number of rows to return. | 5 |
Returns:
Type | Description |
---|---|
Table | The first n rows of the table. |
Examples:
Return the first 5 rows of the table.
tab.head()
pykx.Table(pykx.q(' x y z w v ----------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 '))
Return the first 10 rows of the table.
tab.head(10)
pykx.Table(pykx.q(' x y z w v ----------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 5 MSFT 12.28187 127 50 6 MSFT 76.00103 898 200 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 '))
Table.tail()¶
Table.tail(n=5)
Get the last n rows from a table.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n | int | The number of rows to return. | 5 |
Returns:
Type | Description |
---|---|
Table | The last n rows of the table. |
Examples:
Return the last 5 rows of the table.
tab.tail()
pykx.Table(pykx.q(' x y z w v ------------------------ 995 AAPL 54.79148 295 996 AAPL 448.0876 779 997 GOOG 173.26 896 0 998 GOOG 350.8906 200 50 999 MSFT 409.2486 807 '))
Return the last 10 rows of the table.
tab.tail(10)
pykx.Table(pykx.q(' x y z w v ------------------------- 990 GOOG 407.4229 546 100 991 AAPL 18.6764 228 50 992 AAPL 408.8369 665 200 993 AAPL 282.1039 166 994 GOOG 69.54563 273 200 995 AAPL 54.79148 295 996 AAPL 448.0876 779 997 GOOG 173.26 896 0 998 GOOG 350.8906 200 50 999 MSFT 409.2486 807 '))
Table.get()¶
Table.get(key, default=None)
Get a column or columns from a table by key, if the key does not exist return the default value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
key | Union[str, list[str]] | The column name or list of names to get from the table. | required |
default | int | The default value if the key is not found. | None |
Returns:
Type | Description |
---|---|
Union[Table, Any] | A table contatining only the columns requested or the default value. |
Examples:
Get the y
column from the table.
tab.get('y')
pykx.Table(pykx.q(' y ---- AAPL GOOG GOOG MSFT MSFT MSFT MSFT AAPL AAPL AAPL MSFT GOOG MSFT MSFT MSFT MSFT GOOG AAPL AAPL AAPL .. '))
Get the y
and z
columns from the table.
tab.get(['y', 'z'])
pykx.Table(pykx.q(' y z ------------- AAPL 60.54614 GOOG 292.5522 GOOG 283.0627 MSFT 42.38194 MSFT 453.4634 MSFT 12.28187 MSFT 76.00103 AAPL 17.802 AAPL 68.05945 AAPL 405.7366 MSFT 15.16717 GOOG 164.3905 MSFT 81.26465 MSFT 218.853 MSFT 181.2571 MSFT 311.1167 GOOG 81.45016 AAPL 70.85145 AAPL 172.3351 AAPL 9.906452 .. '))
Attempt to get the q
column from the table and recieve none as that column does not exist.
print(tab.get('q'))
None
Attempt to get the q
column from the table and recieve the default value not found
as that column does not exist.
tab.get('q', 'not found')
'not found'
Table.at[]¶
Table.at[row, col]
Access a single value for a row / column pair.
Similar to loc[]
, in that both provide label-based lookups. Use at
if you only need to get or set a single value.
The at
property can be used for both assignment and retrieval of values at a given row and column.
Examples:
Get the value of the z
column in the 997th row.
tab.at[997, 'z']
pykx.FloatAtom(pykx.q('173.26'))
Reassign the value of the z
column in the 997th row to 3.14159
.
tab.at[997, 'z'] = 3.14159
tab.at[997, 'z']
pykx.FloatAtom(pykx.q('3.14159'))
Table.loc[]¶
Table.loc[:, :]
Access a group of rows and columns by label or by BooleanVector.
loc
is a label based form of indexing, but may also be used with a boolean array.
Allowed inputs are:
- A single label, e.g. 5 or 'a', (note that 5 is interpreted as a label of the index, and never as an integer position along the index)
- A list or array of labels, e.g. ['a', 'b', 'c']
- A slice object with labels, e.g. 'a':'f'
- Warning contrary to usual python slices, both the start and the stop are included
- A BooleanVector of the same length as the axis being sliced
- An alignable BooleanVector. The index of the key will be aligned before masking
- An alignable Index. The Index of the returned selection will be the input
- A callable function with one argument (the calling Table like object) and that returns valid output for indexing (e.g. one of the above)
Note:
When the Pandas API is enabled, using []
to index into a table will use Table.loc[]
Returns:
Type | Description |
---|---|
Table | A table contatining only the columns / rows requested. |
Examples:
Get every row in the y
column.
tab[:, 'y']
pykx.Table(pykx.q(' y ---- AAPL GOOG GOOG MSFT MSFT MSFT MSFT AAPL AAPL AAPL MSFT GOOG MSFT MSFT MSFT MSFT GOOG AAPL AAPL AAPL .. '))
Get all rows of the table where the value in the z
column is greater than 250.0
tab[tab['z'] > 250.0]
pykx.Table(pykx.q(' x y z w v ------------------------ 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 4 MSFT 453.4634 69 100 9 AAPL 405.7366 56 0 15 MSFT 311.1167 705 100 22 MSFT 325.2313 425 100 23 GOOG 389.6867 143 200 26 MSFT 374.9333 225 27 AAPL 351.9635 242 200 28 AAPL 292.2184 153 50 30 MSFT 445.9395 900 100 31 AAPL 406.9874 495 250 32 GOOG 484.709 127 37 MSFT 354.8972 965 39 MSFT 323.4569 825 100 40 GOOG 446.6471 161 0 44 GOOG 465.8163 447 0 47 AAPL 251.128 608 250 49 MSFT 315.3486 997 51 GOOG 379.302 16 0 .. '))
Replace all null values in the column v
with the value -100
.
tab.loc[tab['v'] == kx.q('0N'), 'v'] = -100
tab
pykx.Table(pykx.q(' x y z w v ------------------------- 0 AAPL 60.54614 184 -100 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 5 MSFT 12.28187 127 50 6 MSFT 76.00103 898 200 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 10 MSFT 15.16717 964 50 11 GOOG 164.3905 826 -100 12 MSFT 81.26465 593 250 13 MSFT 218.853 332 200 14 MSFT 181.2571 502 50 15 MSFT 311.1167 705 100 16 GOOG 81.45016 110 -100 17 AAPL 70.85145 722 50 18 AAPL 172.3351 410 100 19 AAPL 9.906452 757 200 .. '))
Replace all locations in column v
where the value is -100
with a null.
tab[tab['v'] == -100, 'v'] = kx.q('0N')
tab
pykx.Table(pykx.q(' x y z w v ------------------------ 0 AAPL 60.54614 184 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 5 MSFT 12.28187 127 50 6 MSFT 76.00103 898 200 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 10 MSFT 15.16717 964 50 11 GOOG 164.3905 826 12 MSFT 81.26465 593 250 13 MSFT 218.853 332 200 14 MSFT 181.2571 502 50 15 MSFT 311.1167 705 100 16 GOOG 81.45016 110 17 AAPL 70.85145 722 50 18 AAPL 172.3351 410 100 19 AAPL 9.906452 757 200 .. '))
Table.iloc[]¶
Table.iloc[:, :]
Purely integer-location based indexing for selection by position.
iloc
is primarily integer position based (from 0 to length-1 of the axis), but may also be used with a BooleanVector.
Allowed inputs are:
- An integer, e.g. 5.
- A list or array of integers, e.g. [4, 3, 0].
- A slice object with ints, e.g. 1:7.
- A BooleanVector.
- A callable function with one argument (the calling Series or DataFrame) and that returns valid output for indexing (one of the above). This is useful in method chains, when you don’t have a reference to the calling object, but would like to base your selection on some value.
- A tuple of row and column indexes. The tuple elements consist of one of the above inputs, e.g. (0, 1).
Returns:
Type | Description |
---|---|
Table | A table contatining only the columns / rows requested. |
Examples:
Get the second row from a table.
tab.iloc[1]
pykx.Table(pykx.q(' x y z w v ----------------------- 1 GOOG 292.5522 299 100 '))
Get the first 5 rows from a table.
tab.iloc[:5]
pykx.Table(pykx.q(' x y z w v ----------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 '))
Get all rows of the table where the y
column is equal to AAPL
.
tab.iloc[tab['y'] == 'AAPL']
pykx.Table(pykx.q(' x y z w v ------------------------ 0 AAPL 60.54614 184 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 17 AAPL 70.85145 722 50 18 AAPL 172.3351 410 100 19 AAPL 9.906452 757 200 20 AAPL 199.5309 790 25 AAPL 27.35117 145 0 27 AAPL 351.9635 242 200 28 AAPL 292.2184 153 50 31 AAPL 406.9874 495 250 35 AAPL 199.1284 252 200 38 AAPL 192.8447 604 45 AAPL 241.7971 919 50 46 AAPL 25.79979 131 50 47 AAPL 251.128 608 250 54 AAPL 374.937 986 57 AAPL 401.5671 996 58 AAPL 181.9741 768 .. '))
Get all rows of the table where the y
column is equal to AAPL
, and only return the y
, z
and w
columns.
tab.iloc[tab['y'] == 'AAPL', ['y', 'z', 'w']]
pykx.Table(pykx.q(' y z w ----------------- AAPL 60.54614 184 AAPL 17.802 822 AAPL 68.05945 197 AAPL 405.7366 56 AAPL 70.85145 722 AAPL 172.3351 410 AAPL 9.906452 757 AAPL 199.5309 790 AAPL 27.35117 145 AAPL 351.9635 242 AAPL 292.2184 153 AAPL 406.9874 495 AAPL 199.1284 252 AAPL 192.8447 604 AAPL 241.7971 919 AAPL 25.79979 131 AAPL 251.128 608 AAPL 374.937 986 AAPL 401.5671 996 AAPL 181.9741 768 .. '))
Replace all null values in the column v
with the value -100
.
tab.iloc[tab['v'] == kx.q('0N'), 'v'] = -100
tab
pykx.Table(pykx.q(' x y z w v ------------------------- 0 AAPL 60.54614 184 -100 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 5 MSFT 12.28187 127 50 6 MSFT 76.00103 898 200 7 AAPL 17.802 822 0 8 AAPL 68.05945 197 0 9 AAPL 405.7366 56 0 10 MSFT 15.16717 964 50 11 GOOG 164.3905 826 -100 12 MSFT 81.26465 593 250 13 MSFT 218.853 332 200 14 MSFT 181.2571 502 50 15 MSFT 311.1167 705 100 16 GOOG 81.45016 110 -100 17 AAPL 70.85145 722 50 18 AAPL 172.3351 410 100 19 AAPL 9.906452 757 200 .. '))
Table.pop()¶
Table.pop(item)
Remove a column or columns from a table by column name and return the column after it has been removed.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
item | Union[str, list[str]] | The column name or list of names to pop from the table. | required |
Returns:
Type | Description |
---|---|
Table | A table contatining only the columns removed from the input table. |
Examples:
Remove the v
column from the table and return it.
print(f'{tab.head()}\n\n')
print('Pop the `v` column out of the table')
print(f'{tab.pop("v").head()}\n\n')
print('Updated Table')
print(tab.head())
x y z w v ------------------------ 0 AAPL 60.54614 184 -100 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 Pop the `v` column out of the table v ---- -100 100 250 0 100 Updated Table x y z w ------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 2 GOOG 283.0627 411 3 MSFT 42.38194 713 4 MSFT 453.4634 69
Remove the z
and w
columns from the table and return them.
print(f'{tab.head()}\n\n')
print('Pop the `z` and `w` columns out of the table')
print(f'{tab.pop(["z", "w"]).head()}\n\n')
print('Updated Table')
print(tab.head())
x y z w ------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 2 GOOG 283.0627 411 3 MSFT 42.38194 713 4 MSFT 453.4634 69 Pop the `z` and `w` columns out of the table z w ------------ 60.54614 184 292.5522 299 283.0627 411 42.38194 713 453.4634 69 Updated Table x y ------ 0 AAPL 1 GOOG 2 GOOG 3 MSFT 4 MSFT