import os
# While this feature is in beta it must be explicitely enabled with this environment variable
# or by adding the `--pandas-api` flag to the QARGS environment variable
os.environ['PYKX_ENABLE_PANDAS_API'] = 'true'
import pykx as kx
Constructing Tables¶
Table¶
Create a table from a list of rows or by converting a Python dictionary object
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
| x | Union[list, array] | An array like object containing the contents of each row of the table. | None |
| data | dict | A dictionary to be converted into a Table object. | None |
| columns | list[str] | A list of column names to use when constructing from an array of rows. | None |
Returns:
| Type | Description |
|---|---|
| Table | The newly constructed table object. |
Examples:
Create a table from a dictionary object.
kx.Table(data={'x': list(range(10)), 'y': [10 - x for x in range(10)]})
pykx.Table(pykx.q('
x y
----
0 10
1 9
2 8
3 7
4 6
5 5
6 4
7 3
8 2
9 1
'))
Create a Table from an array like object.
kx.Table([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
pykx.Table(pykx.q('
x x1
----
0 1
2 3
4 5
6 7
8 9
'))
Create a Table from an array like object and provide names for the columns to use.
kx.Table([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]], columns=['x', 'y', 'z'])
pykx.Table(pykx.q('
x y z
-------
0 1 2
3 4 5
6 7 8
9 10 11
'))
Keyed Table¶
Create a keyed table from a list of rows or by converting a Python dictionary object
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
| x | Union[list, array] | An array like object containing the contents of each row of the table. | None |
| data | dict | A dictionary to be converted into a Table object. | None |
| columns | list[str] | A list of column names to use when constructing from an array of rows. | None |
| index | list[Any] | An array like object to use as the index column of the table. | None |
Returns:
| Type | Description |
|---|---|
| KeyedTable | The newly constructed keyed table object. |
Examples:
Create a keyed table from a dictionary object.
kx.KeyedTable(data={'x': list(range(10)), 'y': list(10 - x for x in range(10))})
pykx.KeyedTable(pykx.q('
idx| x y
---| ----
0 | 0 10
1 | 1 9
2 | 2 8
3 | 3 7
4 | 4 6
5 | 5 5
6 | 6 4
7 | 7 3
8 | 8 2
9 | 9 1
'))
Create a keyed table from a list of rows.
kx.KeyedTable([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
pykx.KeyedTable(pykx.q('
idx| x x1
---| ----
0 | 0 1
1 | 2 3
2 | 4 5
3 | 6 7
4 | 8 9
'))
Create a keyed table from a list of rows and provide names for the resulting columns.
kx.KeyedTable([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]], columns=['x', 'y', 'z'])
pykx.KeyedTable(pykx.q('
idx| x y z
---| -------
0 | 0 1 2
1 | 3 4 5
2 | 6 7 8
3 | 9 10 11
'))
Create a keyed table with a specified index column.
kx.KeyedTable(data={'x': list(range(10)), 'y': list(10 - x for x in range(10))}, index=[2 * x for x in range(10)])
pykx.KeyedTable(pykx.q('
idx| x y
---| ----
0 | 0 10
2 | 1 9
4 | 2 8
6 | 3 7
8 | 4 6
10 | 5 5
12 | 6 4
14 | 7 3
16 | 8 2
18 | 9 1
'))
Metadata¶
kx.q('N: 1000')
tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')
tab
pykx.Table(pykx.q('
x y z w v
------------------------
0 AAPL 280.0322 868 200
1 MSFT 48.24615 18
2 AAPL 66.99184 301 50
3 MSFT 435.7574 951 0
4 AAPL 385.586 365 50
5 AAPL 325.6626 386 0
6 GOOG 68.13659 844 50
7 AAPL 399.9134 972 100
8 AAPL 310.2315 457 50
9 GOOG 486.8679 475 250
10 AAPL 483.0258 28 200
11 MSFT 210.9703 387 250
12 AAPL 49.11884 205 0
13 GOOG 463.9106 628
14 MSFT 144.0166 848 250
15 AAPL 258.0005 832 50
16 MSFT 420.7807 117 200
17 GOOG 327.0546 914 250
18 MSFT 421.2875 470 0
19 MSFT 481.383 446 50
..
'))
Table.columns¶
Get the name of each column in the table
tab.columns
['x', 'y', 'z', 'w', 'v']
Table.dtypes¶
Get the pandas dtype of each column
tab.dtypes
x int64 y object z float64 w int64 v int64 dtype: object
Table.empty¶
Returns True if the table is empty otherwise returns False.
tab.empty
False
Table.ndim¶
Get the nuber of columns within the table.
tab.ndim
5
Table.shape¶
Get the shape of the table as a tuple (number of rows, number of columns).
tab.shape
(1000, 5)
Table.size¶
Get the number of values in the table (rows * cols).
tab.size
5000
Indexing¶
# The examples in this section will use this example table filled with random data
kx.q('N: 1000')
tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')
tab
pykx.Table(pykx.q('
x y z w v
------------------------
0 AAPL 60.54614 184
1 GOOG 292.5522 299 100
2 GOOG 283.0627 411 250
3 MSFT 42.38194 713 0
4 MSFT 453.4634 69 100
5 MSFT 12.28187 127 50
6 MSFT 76.00103 898 200
7 AAPL 17.802 822 0
8 AAPL 68.05945 197 0
9 AAPL 405.7366 56 0
10 MSFT 15.16717 964 50
11 GOOG 164.3905 826
12 MSFT 81.26465 593 250
13 MSFT 218.853 332 200
14 MSFT 181.2571 502 50
15 MSFT 311.1167 705 100
16 GOOG 81.45016 110
17 AAPL 70.85145 722 50
18 AAPL 172.3351 410 100
19 AAPL 9.906452 757 200
..
'))
Table.head()¶
Table.head(n=5)
Get the first n rows from a table.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
| n | int | The number of rows to return. | 5 |
Returns:
| Type | Description |
|---|---|
| Table | The first n rows of the table. |
Examples:
Return the first 5 rows of the table.
tab.head()
pykx.Table(pykx.q('
x y z w v
-----------------------
0 AAPL 60.54614 184
1 GOOG 292.5522 299 100
2 GOOG 283.0627 411 250
3 MSFT 42.38194 713 0
4 MSFT 453.4634 69 100
'))
Return the first 10 rows of the table.
tab.head(10)
pykx.Table(pykx.q('
x y z w v
-----------------------
0 AAPL 60.54614 184
1 GOOG 292.5522 299 100
2 GOOG 283.0627 411 250
3 MSFT 42.38194 713 0
4 MSFT 453.4634 69 100
5 MSFT 12.28187 127 50
6 MSFT 76.00103 898 200
7 AAPL 17.802 822 0
8 AAPL 68.05945 197 0
9 AAPL 405.7366 56 0
'))
Table.tail()¶
Table.tail(n=5)
Get the last n rows from a table.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
| n | int | The number of rows to return. | 5 |
Returns:
| Type | Description |
|---|---|
| Table | The last n rows of the table. |
Examples:
Return the last 5 rows of the table.
tab.tail()
pykx.Table(pykx.q('
x y z w v
------------------------
995 AAPL 54.79148 295
996 AAPL 448.0876 779
997 GOOG 173.26 896 0
998 GOOG 350.8906 200 50
999 MSFT 409.2486 807
'))
Return the last 10 rows of the table.
tab.tail(10)
pykx.Table(pykx.q('
x y z w v
-------------------------
990 GOOG 407.4229 546 100
991 AAPL 18.6764 228 50
992 AAPL 408.8369 665 200
993 AAPL 282.1039 166
994 GOOG 69.54563 273 200
995 AAPL 54.79148 295
996 AAPL 448.0876 779
997 GOOG 173.26 896 0
998 GOOG 350.8906 200 50
999 MSFT 409.2486 807
'))
Table.get()¶
Table.get(key, default=None)
Get a column or columns from a table by key, if the key does not exist return the default value.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
| key | Union[str, list[str]] | The column name or list of names to get from the table. | required |
| default | int | The default value if the key is not found. | None |
Returns:
| Type | Description |
|---|---|
| Union[Table, Any] | A table contatining only the columns requested or the default value. |
Examples:
Get the y column from the table.
tab.get('y')
pykx.Table(pykx.q('
y
----
AAPL
GOOG
GOOG
MSFT
MSFT
MSFT
MSFT
AAPL
AAPL
AAPL
MSFT
GOOG
MSFT
MSFT
MSFT
MSFT
GOOG
AAPL
AAPL
AAPL
..
'))
Get the y and z columns from the table.
tab.get(['y', 'z'])
pykx.Table(pykx.q('
y z
-------------
AAPL 60.54614
GOOG 292.5522
GOOG 283.0627
MSFT 42.38194
MSFT 453.4634
MSFT 12.28187
MSFT 76.00103
AAPL 17.802
AAPL 68.05945
AAPL 405.7366
MSFT 15.16717
GOOG 164.3905
MSFT 81.26465
MSFT 218.853
MSFT 181.2571
MSFT 311.1167
GOOG 81.45016
AAPL 70.85145
AAPL 172.3351
AAPL 9.906452
..
'))
Attempt to get the q column from the table and recieve none as that column does not exist.
print(tab.get('q'))
None
Attempt to get the q column from the table and recieve the default value not found as that column does not exist.
tab.get('q', 'not found')
'not found'
Table.at[]¶
Table.at[row, col]
Access a single value for a row / column pair.
Similar to loc[], in that both provide label-based lookups. Use at if you only need to get or set a single value.
The at property can be used for both assignment and retrieval of values at a given row and column.
Examples:
Get the value of the z column in the 997th row.
tab.at[997, 'z']
pykx.FloatAtom(pykx.q('173.26'))
Reassign the value of the z column in the 997th row to 3.14159.
tab.at[997, 'z'] = 3.14159
tab.at[997, 'z']
pykx.FloatAtom(pykx.q('3.14159'))
Table.loc[]¶
Table.loc[:, :]
Access a group of rows and columns by label or by BooleanVector.
loc is a label based form of indexing, but may also be used with a boolean array.
Allowed inputs are:
- A single label, e.g. 5 or 'a', (note that 5 is interpreted as a label of the index, and never as an integer position along the index)
- A list or array of labels, e.g. ['a', 'b', 'c']
- A slice object with labels, e.g. 'a':'f'
- Warning contrary to usual python slices, both the start and the stop are included
- A BooleanVector of the same length as the axis being sliced
- An alignable BooleanVector. The index of the key will be aligned before masking
- An alignable Index. The Index of the returned selection will be the input
- A callable function with one argument (the calling Table like object) and that returns valid output for indexing (e.g. one of the above)
Note:
When the Pandas API is enabled, using [] to index into a table will use Table.loc[]
Returns:
| Type | Description |
|---|---|
| Table | A table contatining only the columns / rows requested. |
Examples:
Get every row in the y column.
tab[:, 'y']
pykx.Table(pykx.q('
y
----
AAPL
GOOG
GOOG
MSFT
MSFT
MSFT
MSFT
AAPL
AAPL
AAPL
MSFT
GOOG
MSFT
MSFT
MSFT
MSFT
GOOG
AAPL
AAPL
AAPL
..
'))
Get all rows of the table where the value in the z column is greater than 250.0
tab[tab['z'] > 250.0]
pykx.Table(pykx.q('
x y z w v
------------------------
1 GOOG 292.5522 299 100
2 GOOG 283.0627 411 250
4 MSFT 453.4634 69 100
9 AAPL 405.7366 56 0
15 MSFT 311.1167 705 100
22 MSFT 325.2313 425 100
23 GOOG 389.6867 143 200
26 MSFT 374.9333 225
27 AAPL 351.9635 242 200
28 AAPL 292.2184 153 50
30 MSFT 445.9395 900 100
31 AAPL 406.9874 495 250
32 GOOG 484.709 127
37 MSFT 354.8972 965
39 MSFT 323.4569 825 100
40 GOOG 446.6471 161 0
44 GOOG 465.8163 447 0
47 AAPL 251.128 608 250
49 MSFT 315.3486 997
51 GOOG 379.302 16 0
..
'))
Replace all null values in the column v with the value -100.
tab.loc[tab['v'] == kx.q('0N'), 'v'] = -100
tab
pykx.Table(pykx.q('
x y z w v
-------------------------
0 AAPL 60.54614 184 -100
1 GOOG 292.5522 299 100
2 GOOG 283.0627 411 250
3 MSFT 42.38194 713 0
4 MSFT 453.4634 69 100
5 MSFT 12.28187 127 50
6 MSFT 76.00103 898 200
7 AAPL 17.802 822 0
8 AAPL 68.05945 197 0
9 AAPL 405.7366 56 0
10 MSFT 15.16717 964 50
11 GOOG 164.3905 826 -100
12 MSFT 81.26465 593 250
13 MSFT 218.853 332 200
14 MSFT 181.2571 502 50
15 MSFT 311.1167 705 100
16 GOOG 81.45016 110 -100
17 AAPL 70.85145 722 50
18 AAPL 172.3351 410 100
19 AAPL 9.906452 757 200
..
'))
Replace all locations in column v where the value is -100 with a null.
tab[tab['v'] == -100, 'v'] = kx.q('0N')
tab
pykx.Table(pykx.q('
x y z w v
------------------------
0 AAPL 60.54614 184
1 GOOG 292.5522 299 100
2 GOOG 283.0627 411 250
3 MSFT 42.38194 713 0
4 MSFT 453.4634 69 100
5 MSFT 12.28187 127 50
6 MSFT 76.00103 898 200
7 AAPL 17.802 822 0
8 AAPL 68.05945 197 0
9 AAPL 405.7366 56 0
10 MSFT 15.16717 964 50
11 GOOG 164.3905 826
12 MSFT 81.26465 593 250
13 MSFT 218.853 332 200
14 MSFT 181.2571 502 50
15 MSFT 311.1167 705 100
16 GOOG 81.45016 110
17 AAPL 70.85145 722 50
18 AAPL 172.3351 410 100
19 AAPL 9.906452 757 200
..
'))
Table.iloc[]¶
Table.iloc[:, :]
Purely integer-location based indexing for selection by position.
iloc is primarily integer position based (from 0 to length-1 of the axis), but may also be used with a BooleanVector.
Allowed inputs are:
- An integer, e.g. 5.
- A list or array of integers, e.g. [4, 3, 0].
- A slice object with ints, e.g. 1:7.
- A BooleanVector.
- A callable function with one argument (the calling Series or DataFrame) and that returns valid output for indexing (one of the above). This is useful in method chains, when you don’t have a reference to the calling object, but would like to base your selection on some value.
- A tuple of row and column indexes. The tuple elements consist of one of the above inputs, e.g. (0, 1).
Returns:
| Type | Description |
|---|---|
| Table | A table contatining only the columns / rows requested. |
Examples:
Get the second row from a table.
tab.iloc[1]
pykx.Table(pykx.q('
x y z w v
-----------------------
1 GOOG 292.5522 299 100
'))
Get the first 5 rows from a table.
tab.iloc[:5]
pykx.Table(pykx.q('
x y z w v
-----------------------
0 AAPL 60.54614 184
1 GOOG 292.5522 299 100
2 GOOG 283.0627 411 250
3 MSFT 42.38194 713 0
4 MSFT 453.4634 69 100
'))
Get all rows of the table where the y column is equal to AAPL.
tab.iloc[tab['y'] == 'AAPL']
pykx.Table(pykx.q('
x y z w v
------------------------
0 AAPL 60.54614 184
7 AAPL 17.802 822 0
8 AAPL 68.05945 197 0
9 AAPL 405.7366 56 0
17 AAPL 70.85145 722 50
18 AAPL 172.3351 410 100
19 AAPL 9.906452 757 200
20 AAPL 199.5309 790
25 AAPL 27.35117 145 0
27 AAPL 351.9635 242 200
28 AAPL 292.2184 153 50
31 AAPL 406.9874 495 250
35 AAPL 199.1284 252 200
38 AAPL 192.8447 604
45 AAPL 241.7971 919 50
46 AAPL 25.79979 131 50
47 AAPL 251.128 608 250
54 AAPL 374.937 986
57 AAPL 401.5671 996
58 AAPL 181.9741 768
..
'))
Get all rows of the table where the y column is equal to AAPL, and only return the y, z and w columns.
tab.iloc[tab['y'] == 'AAPL', ['y', 'z', 'w']]
pykx.Table(pykx.q('
y z w
-----------------
AAPL 60.54614 184
AAPL 17.802 822
AAPL 68.05945 197
AAPL 405.7366 56
AAPL 70.85145 722
AAPL 172.3351 410
AAPL 9.906452 757
AAPL 199.5309 790
AAPL 27.35117 145
AAPL 351.9635 242
AAPL 292.2184 153
AAPL 406.9874 495
AAPL 199.1284 252
AAPL 192.8447 604
AAPL 241.7971 919
AAPL 25.79979 131
AAPL 251.128 608
AAPL 374.937 986
AAPL 401.5671 996
AAPL 181.9741 768
..
'))
Replace all null values in the column v with the value -100.
tab.iloc[tab['v'] == kx.q('0N'), 'v'] = -100
tab
pykx.Table(pykx.q('
x y z w v
-------------------------
0 AAPL 60.54614 184 -100
1 GOOG 292.5522 299 100
2 GOOG 283.0627 411 250
3 MSFT 42.38194 713 0
4 MSFT 453.4634 69 100
5 MSFT 12.28187 127 50
6 MSFT 76.00103 898 200
7 AAPL 17.802 822 0
8 AAPL 68.05945 197 0
9 AAPL 405.7366 56 0
10 MSFT 15.16717 964 50
11 GOOG 164.3905 826 -100
12 MSFT 81.26465 593 250
13 MSFT 218.853 332 200
14 MSFT 181.2571 502 50
15 MSFT 311.1167 705 100
16 GOOG 81.45016 110 -100
17 AAPL 70.85145 722 50
18 AAPL 172.3351 410 100
19 AAPL 9.906452 757 200
..
'))
Table.pop()¶
Table.pop(item)
Remove a column or columns from a table by column name and return the column after it has been removed.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
| item | Union[str, list[str]] | The column name or list of names to pop from the table. | required |
Returns:
| Type | Description |
|---|---|
| Table | A table contatining only the columns removed from the input table. |
Examples:
Remove the v column from the table and return it.
print(f'{tab.head()}\n\n')
print('Pop the `v` column out of the table')
print(f'{tab.pop("v").head()}\n\n')
print('Updated Table')
print(tab.head())
x y z w v ------------------------ 0 AAPL 60.54614 184 -100 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 Pop the `v` column out of the table v ---- -100 100 250 0 100 Updated Table x y z w ------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 2 GOOG 283.0627 411 3 MSFT 42.38194 713 4 MSFT 453.4634 69
Remove the z and w columns from the table and return them.
print(f'{tab.head()}\n\n')
print('Pop the `z` and `w` columns out of the table')
print(f'{tab.pop(["z", "w"]).head()}\n\n')
print('Updated Table')
print(tab.head())
x y z w ------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 2 GOOG 283.0627 411 3 MSFT 42.38194 713 4 MSFT 453.4634 69 Pop the `z` and `w` columns out of the table z w ------------ 60.54614 184 292.5522 299 283.0627 411 42.38194 713 453.4634 69 Updated Table x y ------ 0 AAPL 1 GOOG 2 GOOG 3 MSFT 4 MSFT