import os
# While this feature is in beta it must be explicitely enabled with this environment variable
# or by adding the `--pandas-api` flag to the QARGS environment variable
os.environ['PYKX_ENABLE_PANDAS_API'] = 'true'
import pykx as kx
import numpy as np
Constructing Tables¶
Table¶
Create a table from a list of rows or by converting a Python dictionary object
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| x | Union[list, array] | An array like object containing the contents of each row of the table. | None | 
| data | dict | A dictionary to be converted into a Table object. | None | 
| columns | list[str] | A list of column names to use when constructing from an array of rows. | None | 
Returns:
| Type | Description | 
|---|---|
| Table | The newly constructed table object. | 
Examples:
Create a table from a dictionary object.
kx.Table(data={'x': list(range(10)), 'y': [10 - x for x in range(10)]})
pykx.Table(pykx.q('
x y 
----
0 10
1 9 
2 8 
3 7 
4 6 
5 5 
6 4 
7 3 
8 2 
9 1 
'))
Create a Table from an array like object.
kx.Table([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
pykx.Table(pykx.q('
x x1
----
0 1 
2 3 
4 5 
6 7 
8 9 
'))
Create a Table from an array like object and provide names for the columns to use.
kx.Table([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]], columns=['x', 'y', 'z'])
pykx.Table(pykx.q('
x y  z 
-------
0 1  2 
3 4  5 
6 7  8 
9 10 11
'))
Keyed Table¶
Create a keyed table from a list of rows or by converting a Python dictionary object
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| x | Union[list, array] | An array like object containing the contents of each row of the table. | None | 
| data | dict | A dictionary to be converted into a Table object. | None | 
| columns | list[str] | A list of column names to use when constructing from an array of rows. | None | 
| index | list[Any] | An array like object to use as the index column of the table. | None | 
Returns:
| Type | Description | 
|---|---|
| KeyedTable | The newly constructed keyed table object. | 
Examples:
Create a keyed table from a dictionary object.
kx.KeyedTable(data={'x': list(range(10)), 'y': list(10 - x for x in range(10))})
pykx.KeyedTable(pykx.q('
idx| x y 
---| ----
0  | 0 10
1  | 1 9 
2  | 2 8 
3  | 3 7 
4  | 4 6 
5  | 5 5 
6  | 6 4 
7  | 7 3 
8  | 8 2 
9  | 9 1 
'))
Create a keyed table from a list of rows.
kx.KeyedTable([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
pykx.KeyedTable(pykx.q('
idx| x x1
---| ----
0  | 0 1 
1  | 2 3 
2  | 4 5 
3  | 6 7 
4  | 8 9 
'))
Create a keyed table from a list of rows and provide names for the resulting columns.
kx.KeyedTable([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]], columns=['x', 'y', 'z'])
pykx.KeyedTable(pykx.q('
idx| x y  z 
---| -------
0  | 0 1  2 
1  | 3 4  5 
2  | 6 7  8 
3  | 9 10 11
'))
Create a keyed table with a specified index column.
kx.KeyedTable(data={'x': list(range(10)), 'y': list(10 - x for x in range(10))}, index=[2 * x for x in range(10)])
pykx.KeyedTable(pykx.q('
idx| x y 
---| ----
0  | 0 10
2  | 1 9 
4  | 2 8 
6  | 3 7 
8  | 4 6 
10 | 5 5 
12 | 6 4 
14 | 7 3 
16 | 8 2 
18 | 9 1 
'))
Metadata¶
kx.q('N: 1000')
tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')
tab
pykx.Table(pykx.q('
x  y    z        w   v  
------------------------
0  AAPL 280.0322 868 200
1  MSFT 48.24615 18     
2  AAPL 66.99184 301 50 
3  MSFT 435.7574 951 0  
4  AAPL 385.586  365 50 
5  AAPL 325.6626 386 0  
6  GOOG 68.13659 844 50 
7  AAPL 399.9134 972 100
8  AAPL 310.2315 457 50 
9  GOOG 486.8679 475 250
10 AAPL 483.0258 28  200
11 MSFT 210.9703 387 250
12 AAPL 49.11884 205 0  
13 GOOG 463.9106 628    
14 MSFT 144.0166 848 250
15 AAPL 258.0005 832 50 
16 MSFT 420.7807 117 200
17 GOOG 327.0546 914 250
18 MSFT 421.2875 470 0  
19 MSFT 481.383  446 50 
..
'))
Table.columns¶
Get the name of each column in the table
tab.columns
['x', 'y', 'z', 'w', 'v']
Table.dtypes¶
Get the pandas dtype of each column
tab.dtypes
x int64 y object z float64 w int64 v int64 dtype: object
Table.empty¶
Returns True if the table is empty otherwise returns False.
tab.empty
False
Table.ndim¶
Get the nuber of columns within the table.
tab.ndim
5
Table.shape¶
Get the shape of the table as a tuple (number of rows, number of columns).
tab.shape
(1000, 5)
Table.size¶
Get the number of values in the table (rows * cols).
tab.size
5000
Table.mean()¶
Table.mean(axis=0, numeric_only=False)Get the mean of values across the requested axis.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| axis | int | The axis to calculate mean across 0 is columns, 1 is rows. | 0 | 
| numeric_only | bool | Include only columns / rows with numeric data. | False | 
Returns:
| Type | Description | 
|---|---|
| Dictionary | The mean across each row / column with the key corresponding to the row number or column name. | 
Examples:
Calculate the mean across the columns of a table
tab = kx.Table(data=
    {
        'a': [1, 2, 2, 4],
        'b': [1, 2, 6, 7],
        'c': [7, 8, 9, 10],
        'd': [7, 11, 14, 14]
    }
)
tab
pykx.Table(pykx.q('
a b c  d 
---------
1 1 7  7 
2 2 8  11
2 6 9  14
4 7 10 14
'))
tab.mean()
pykx.Dictionary(pykx.q('
a| 2.25
b| 4
c| 8.5
d| 11.5
'))
Calculate the mean across the rows of a table
tab.mean(axis=1)
pykx.Dictionary(pykx.q('
0| 4
1| 5.75
2| 7.75
3| 8.75
'))
Table.median()¶
Table.median(axis=0, numeric_only=False)Get the median of values across the requested axis.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| axis | int | The axis to calculate median across 0 is columns, 1 is rows. | 0 | 
| numeric_only | bool | Include only columns / rows with numeric data. | False | 
Returns:
| Type | Description | 
|---|---|
| Dictionary | The median across each row / column with the key corresponding to the row number or column name. | 
Examples:
Calculate the median across the columns of a table
tab = kx.Table(data=
    {
        'a': [1, 2, 2, 4],
        'b': [1, 2, 6, 7],
        'c': [7, 8, 9, 10],
        'd': [7, 11, 14, 14]
    }
)
tab
pykx.Table(pykx.q('
a b c  d 
---------
1 1 7  7 
2 2 8  11
2 6 9  14
4 7 10 14
'))
tab.median()
pykx.Dictionary(pykx.q('
a| 2
b| 4
c| 8.5
d| 12.5
'))
Calculate the median across the rows of a table
tab.median(axis=1)
pykx.Dictionary(pykx.q('
0| 4
1| 5
2| 7.5
3| 8.5
'))
Table.mode()¶
Table.mode(axis=0, numeric_only=False, dropna=True)Get the mode of values across the requested axis.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| axis | int | The axis to calculate mode across 0 is columns, 1 is rows. | 0 | 
| numeric_only | bool | Include only columns / rows with numeric data. | False | 
| dropna | bool | Remove null values from the data before calculating the mode. | True | 
Returns:
| Type | Description | 
|---|---|
| Table | The mode across each row / column with the column corresponding to the row number or column name. | 
Examples:
Calculate the mode across the columns of a table
tab = kx.Table(data=
    {
        'a': [1, 2, 2, 4],
        'b': [1, 2, 6, 7],
        'c': [7, 8, 9, 10],
        'd': [7, 11, 14, 14]
    }
)
tab
pykx.Table(pykx.q('
a b c  d 
---------
1 1 7  7 
2 2 8  11
2 6 9  14
4 7 10 14
'))
tab.mode()
pykx.Table(pykx.q('
a b c  d 
---------
2 1 7  14
  2 8    
  6 9    
  7 10   
'))
Calculate the median across the rows of a table
tab.mode(axis=1)
pykx.Table(pykx.q('
idx 0 1 2  3 
-------------
0   1 7      
1   2        
2   2 6 9  14
3   4 7 10 14
'))
Calculate the mode across columns and keep null values.
tab = kx.Table(data=
    {
        'x': [0, 1, 2, 3, 4, 5, 6, 7, np.NaN, np.NaN],
        'y': [10, 11, 12, 13, 14, 15, 16, 17, 18, np.NaN],
        'z': ['a', 'b', 'c', 'd', 'd', 'e', 'e', 'f', 'g', 'h']
    }
)
tab
pykx.Table(pykx.q('
x  y  z
-------
0  10 a
1  11 b
2  12 c
3  13 d
4  14 d
5  15 e
6  16 e
7  17 f
0n 18 g
0n 0n h
'))
tab.mode(dropna=False)
pykx.Table(pykx.q('
x y  z
------
  10 d
  11 e
  12  
  13  
  14  
  15  
  16  
  17  
  18  
  0n  
'))
Indexing¶
# The examples in this section will use this example table filled with random data
kx.q('N: 1000')
tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')
tab
pykx.Table(pykx.q('
x  y    z        w   v  
------------------------
0  AAPL 60.54614 184    
1  GOOG 292.5522 299 100
2  GOOG 283.0627 411 250
3  MSFT 42.38194 713 0  
4  MSFT 453.4634 69  100
5  MSFT 12.28187 127 50 
6  MSFT 76.00103 898 200
7  AAPL 17.802   822 0  
8  AAPL 68.05945 197 0  
9  AAPL 405.7366 56  0  
10 MSFT 15.16717 964 50 
11 GOOG 164.3905 826    
12 MSFT 81.26465 593 250
13 MSFT 218.853  332 200
14 MSFT 181.2571 502 50 
15 MSFT 311.1167 705 100
16 GOOG 81.45016 110    
17 AAPL 70.85145 722 50 
18 AAPL 172.3351 410 100
19 AAPL 9.906452 757 200
..
'))
Table.head()¶
Table.head(n=5)Get the first n rows from a table.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| n | int | The number of rows to return. | 5 | 
Returns:
| Type | Description | 
|---|---|
| Table | The first nrows of the table. | 
Examples:
Return the first 5 rows of the table.
tab.head()
pykx.Table(pykx.q('
x y    z        w   v  
-----------------------
0 AAPL 60.54614 184    
1 GOOG 292.5522 299 100
2 GOOG 283.0627 411 250
3 MSFT 42.38194 713 0  
4 MSFT 453.4634 69  100
'))
Return the first 10 rows of the table.
tab.head(10)
pykx.Table(pykx.q('
x y    z        w   v  
-----------------------
0 AAPL 60.54614 184    
1 GOOG 292.5522 299 100
2 GOOG 283.0627 411 250
3 MSFT 42.38194 713 0  
4 MSFT 453.4634 69  100
5 MSFT 12.28187 127 50 
6 MSFT 76.00103 898 200
7 AAPL 17.802   822 0  
8 AAPL 68.05945 197 0  
9 AAPL 405.7366 56  0  
'))
Table.tail()¶
Table.tail(n=5)Get the last n rows from a table.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| n | int | The number of rows to return. | 5 | 
Returns:
| Type | Description | 
|---|---|
| Table | The last nrows of the table. | 
Examples:
Return the last 5 rows of the table.
tab.tail()
pykx.Table(pykx.q('
x   y    z        w   v 
------------------------
995 AAPL 54.79148 295   
996 AAPL 448.0876 779   
997 GOOG 173.26   896 0 
998 GOOG 350.8906 200 50
999 MSFT 409.2486 807   
'))
Return the last 10 rows of the table.
tab.tail(10)
pykx.Table(pykx.q('
x   y    z        w   v  
-------------------------
990 GOOG 407.4229 546 100
991 AAPL 18.6764  228 50 
992 AAPL 408.8369 665 200
993 AAPL 282.1039 166    
994 GOOG 69.54563 273 200
995 AAPL 54.79148 295    
996 AAPL 448.0876 779    
997 GOOG 173.26   896 0  
998 GOOG 350.8906 200 50 
999 MSFT 409.2486 807    
'))
Table.get()¶
Table.get(key, default=None)Get a column or columns from a table by key, if the key does not exist return the default value.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| key | Union[str, list[str]] | The column name or list of names to get from the table. | required | 
| default | int | The default value if the key is not found. | None | 
Returns:
| Type | Description | 
|---|---|
| Union[Table, Any] | A table contatining only the columns requested or the default value. | 
Examples:
Get the y column from the table.
tab.get('y')
pykx.Table(pykx.q('
y   
----
AAPL
GOOG
GOOG
MSFT
MSFT
MSFT
MSFT
AAPL
AAPL
AAPL
MSFT
GOOG
MSFT
MSFT
MSFT
MSFT
GOOG
AAPL
AAPL
AAPL
..
'))
Get the y and z columns from the table.
tab.get(['y', 'z'])
pykx.Table(pykx.q('
y    z       
-------------
AAPL 60.54614
GOOG 292.5522
GOOG 283.0627
MSFT 42.38194
MSFT 453.4634
MSFT 12.28187
MSFT 76.00103
AAPL 17.802  
AAPL 68.05945
AAPL 405.7366
MSFT 15.16717
GOOG 164.3905
MSFT 81.26465
MSFT 218.853 
MSFT 181.2571
MSFT 311.1167
GOOG 81.45016
AAPL 70.85145
AAPL 172.3351
AAPL 9.906452
..
'))
Attempt to get the q column from the table and recieve none as that column does not exist.
print(tab.get('q'))
None
Attempt to get the q column from the table and recieve the default value not found as that column does not exist.
tab.get('q', 'not found')
'not found'
Table.at[]¶
Table.at[row, col]Access a single value for a row / column pair.
Similar to loc[], in that both provide label-based lookups. Use at if you only need to get or set a single value.
The at property can be used for both assignment and retrieval of values at a given row and column.
Examples:
Get the value of the z column in the 997th row.
tab.at[997, 'z']
pykx.FloatAtom(pykx.q('173.26'))
Reassign the value of the z column in the 997th row to 3.14159.
tab.at[997, 'z'] = 3.14159
tab.at[997, 'z']
pykx.FloatAtom(pykx.q('3.14159'))
Table.loc[]¶
Table.loc[:, :]Access a group of rows and columns by label or by BooleanVector.
loc is a label based form of indexing, but may also be used with a boolean array.
Allowed inputs are:
- A single label, e.g. 5 or 'a', (note that 5 is interpreted as a label of the index, and never as an integer position along the index)
- A list or array of labels, e.g. ['a', 'b', 'c']
- A slice object with labels, e.g. 'a':'f'- Warning contrary to usual python slices, both the start and the stop are included
 
- A BooleanVector of the same length as the axis being sliced
- An alignable BooleanVector. The index of the key will be aligned before masking
- An alignable Index. The Index of the returned selection will be the input
- A callable function with one argument (the calling Table like object) and that returns valid output for indexing (e.g. one of the above)
Note:
When the Pandas API is enabled, using [] to index into a table will use Table.loc[]
Returns:
| Type | Description | 
|---|---|
| Table | A table contatining only the columns / rows requested. | 
Examples:
Get every row in the y column.
tab[:, 'y']
pykx.Table(pykx.q('
y   
----
AAPL
GOOG
GOOG
MSFT
MSFT
MSFT
MSFT
AAPL
AAPL
AAPL
MSFT
GOOG
MSFT
MSFT
MSFT
MSFT
GOOG
AAPL
AAPL
AAPL
..
'))
Get all rows of the table where the value in the z column is greater than 250.0
tab[tab['z'] > 250.0]
pykx.Table(pykx.q('
x  y    z        w   v  
------------------------
1  GOOG 292.5522 299 100
2  GOOG 283.0627 411 250
4  MSFT 453.4634 69  100
9  AAPL 405.7366 56  0  
15 MSFT 311.1167 705 100
22 MSFT 325.2313 425 100
23 GOOG 389.6867 143 200
26 MSFT 374.9333 225    
27 AAPL 351.9635 242 200
28 AAPL 292.2184 153 50 
30 MSFT 445.9395 900 100
31 AAPL 406.9874 495 250
32 GOOG 484.709  127    
37 MSFT 354.8972 965    
39 MSFT 323.4569 825 100
40 GOOG 446.6471 161 0  
44 GOOG 465.8163 447 0  
47 AAPL 251.128  608 250
49 MSFT 315.3486 997    
51 GOOG 379.302  16  0  
..
'))
Replace all null values in the column v with the value -100.
tab.loc[tab['v'] == kx.q('0N'), 'v'] = -100
tab
pykx.Table(pykx.q('
x  y    z        w   v   
-------------------------
0  AAPL 60.54614 184 -100
1  GOOG 292.5522 299 100 
2  GOOG 283.0627 411 250 
3  MSFT 42.38194 713 0   
4  MSFT 453.4634 69  100 
5  MSFT 12.28187 127 50  
6  MSFT 76.00103 898 200 
7  AAPL 17.802   822 0   
8  AAPL 68.05945 197 0   
9  AAPL 405.7366 56  0   
10 MSFT 15.16717 964 50  
11 GOOG 164.3905 826 -100
12 MSFT 81.26465 593 250 
13 MSFT 218.853  332 200 
14 MSFT 181.2571 502 50  
15 MSFT 311.1167 705 100 
16 GOOG 81.45016 110 -100
17 AAPL 70.85145 722 50  
18 AAPL 172.3351 410 100 
19 AAPL 9.906452 757 200 
..
'))
Replace all locations in column v where the value is -100 with a null.
tab[tab['v'] == -100, 'v'] = kx.q('0N')
tab
pykx.Table(pykx.q('
x  y    z        w   v  
------------------------
0  AAPL 60.54614 184    
1  GOOG 292.5522 299 100
2  GOOG 283.0627 411 250
3  MSFT 42.38194 713 0  
4  MSFT 453.4634 69  100
5  MSFT 12.28187 127 50 
6  MSFT 76.00103 898 200
7  AAPL 17.802   822 0  
8  AAPL 68.05945 197 0  
9  AAPL 405.7366 56  0  
10 MSFT 15.16717 964 50 
11 GOOG 164.3905 826    
12 MSFT 81.26465 593 250
13 MSFT 218.853  332 200
14 MSFT 181.2571 502 50 
15 MSFT 311.1167 705 100
16 GOOG 81.45016 110    
17 AAPL 70.85145 722 50 
18 AAPL 172.3351 410 100
19 AAPL 9.906452 757 200
..
'))
Table.iloc[]¶
Table.iloc[:, :]Purely integer-location based indexing for selection by position.
iloc is primarily integer position based (from 0 to length-1 of the axis), but may also be used with a BooleanVector.
Allowed inputs are:
- An integer, e.g. 5.
- A list or array of integers, e.g. [4, 3, 0].
- A slice object with ints, e.g. 1:7.
- A BooleanVector.
- A callable function with one argument (the calling Series or DataFrame) and that returns valid output for indexing (one of the above). This is useful in method chains, when you don’t have a reference to the calling object, but would like to base your selection on some value.
- A tuple of row and column indexes. The tuple elements consist of one of the above inputs, e.g. (0, 1).
Returns:
| Type | Description | 
|---|---|
| Table | A table contatining only the columns / rows requested. | 
Examples:
Get the second row from a table.
tab.iloc[1]
pykx.Table(pykx.q('
x y    z        w   v  
-----------------------
1 GOOG 292.5522 299 100
'))
Get the first 5 rows from a table.
tab.iloc[:5]
pykx.Table(pykx.q('
x y    z        w   v  
-----------------------
0 AAPL 60.54614 184    
1 GOOG 292.5522 299 100
2 GOOG 283.0627 411 250
3 MSFT 42.38194 713 0  
4 MSFT 453.4634 69  100
'))
Get all rows of the table where the y column is equal to AAPL.
tab.iloc[tab['y'] == 'AAPL']
pykx.Table(pykx.q('
x  y    z        w   v  
------------------------
0  AAPL 60.54614 184    
7  AAPL 17.802   822 0  
8  AAPL 68.05945 197 0  
9  AAPL 405.7366 56  0  
17 AAPL 70.85145 722 50 
18 AAPL 172.3351 410 100
19 AAPL 9.906452 757 200
20 AAPL 199.5309 790    
25 AAPL 27.35117 145 0  
27 AAPL 351.9635 242 200
28 AAPL 292.2184 153 50 
31 AAPL 406.9874 495 250
35 AAPL 199.1284 252 200
38 AAPL 192.8447 604    
45 AAPL 241.7971 919 50 
46 AAPL 25.79979 131 50 
47 AAPL 251.128  608 250
54 AAPL 374.937  986    
57 AAPL 401.5671 996    
58 AAPL 181.9741 768    
..
'))
Get all rows of the table where the y column is equal to AAPL, and only return the y, z and w columns.
tab.iloc[tab['y'] == 'AAPL', ['y', 'z', 'w']]
pykx.Table(pykx.q('
y    z        w  
-----------------
AAPL 60.54614 184
AAPL 17.802   822
AAPL 68.05945 197
AAPL 405.7366 56 
AAPL 70.85145 722
AAPL 172.3351 410
AAPL 9.906452 757
AAPL 199.5309 790
AAPL 27.35117 145
AAPL 351.9635 242
AAPL 292.2184 153
AAPL 406.9874 495
AAPL 199.1284 252
AAPL 192.8447 604
AAPL 241.7971 919
AAPL 25.79979 131
AAPL 251.128  608
AAPL 374.937  986
AAPL 401.5671 996
AAPL 181.9741 768
..
'))
Replace all null values in the column v with the value -100.
tab.iloc[tab['v'] == kx.q('0N'), 'v'] = -100
tab
pykx.Table(pykx.q('
x  y    z        w   v   
-------------------------
0  AAPL 60.54614 184 -100
1  GOOG 292.5522 299 100 
2  GOOG 283.0627 411 250 
3  MSFT 42.38194 713 0   
4  MSFT 453.4634 69  100 
5  MSFT 12.28187 127 50  
6  MSFT 76.00103 898 200 
7  AAPL 17.802   822 0   
8  AAPL 68.05945 197 0   
9  AAPL 405.7366 56  0   
10 MSFT 15.16717 964 50  
11 GOOG 164.3905 826 -100
12 MSFT 81.26465 593 250 
13 MSFT 218.853  332 200 
14 MSFT 181.2571 502 50  
15 MSFT 311.1167 705 100 
16 GOOG 81.45016 110 -100
17 AAPL 70.85145 722 50  
18 AAPL 172.3351 410 100 
19 AAPL 9.906452 757 200 
..
'))
Table.pop()¶
Table.pop(item)Remove a column or columns from a table by column name and return the column after it has been removed.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| item | Union[str, list[str]] | The column name or list of names to pop from the table. | required | 
Returns:
| Type | Description | 
|---|---|
| Table | A table contatining only the columns removed from the input table. | 
Examples:
Remove the v column from the table and return it.
print(f'{tab.head()}\n\n')
print('Pop the `v` column out of the table')
print(f'{tab.pop("v").head()}\n\n')
print('Updated Table')
print(tab.head())
x y z w v ------------------------ 0 AAPL 60.54614 184 -100 1 GOOG 292.5522 299 100 2 GOOG 283.0627 411 250 3 MSFT 42.38194 713 0 4 MSFT 453.4634 69 100 Pop the `v` column out of the table v ---- -100 100 250 0 100 Updated Table x y z w ------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 2 GOOG 283.0627 411 3 MSFT 42.38194 713 4 MSFT 453.4634 69
Remove the z and w columns from the table and return them.
print(f'{tab.head()}\n\n')
print('Pop the `z` and `w` columns out of the table')
print(f'{tab.pop(["z", "w"]).head()}\n\n')
print('Updated Table')
print(tab.head())
x y z w ------------------- 0 AAPL 60.54614 184 1 GOOG 292.5522 299 2 GOOG 283.0627 411 3 MSFT 42.38194 713 4 MSFT 453.4634 69 Pop the `z` and `w` columns out of the table z w ------------ 60.54614 184 292.5522 299 283.0627 411 42.38194 713 453.4634 69 Updated Table x y ------ 0 AAPL 1 GOOG 2 GOOG 3 MSFT 4 MSFT
Reindexing¶
# The examples in this section will use this example table filled with random data
kx.q('N: 1000')
tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')
tab.head()
pykx.Table(pykx.q('
x y    z        w   v  
-----------------------
0 AAPL 246.3763 948 100
1 GOOG 337.246  652 100
2 MSFT 143.1573 420 50 
3 AAPL 314.778  210 200
4 AAPL 444.4764 459 0  
'))
Drop rows.
tab.drop([0, 2, 4, 6, 8, 10]).head()
pykx.Table(pykx.q('
x y    z        w   v  
-----------------------
1 GOOG 337.246  652 100
3 AAPL 314.778  210 200
5 MSFT 409.7593 916 100
7 AAPL 478.1098 608 0  
9 AAPL 58.89948 419 250
'))
Drop columns.
tab.drop('y', axis=1).head()
pykx.Table(pykx.q('
x z        w   v  
------------------
0 246.3763 948 100
1 337.246  652 100
2 143.1573 420 50 
3 314.778  210 200
4 444.4764 459 0  
'))
Drop duplicates.
tab2 = kx.q('([] 100?`AAPL`GOOG`MSFT; 100?3)')
tab2
pykx.Table(pykx.q('
x    x1
-------
GOOG 2 
MSFT 2 
AAPL 2 
MSFT 1 
GOOG 2 
AAPL 2 
AAPL 2 
MSFT 0 
AAPL 1 
GOOG 0 
GOOG 2 
MSFT 1 
GOOG 1 
MSFT 0 
AAPL 0 
MSFT 2 
MSFT 2 
AAPL 0 
GOOG 1 
MSFT 2 
..
'))
tab2.drop_duplicates()
pykx.Table(pykx.q('
x    x1
-------
GOOG 2 
MSFT 2 
AAPL 2 
MSFT 1 
MSFT 0 
AAPL 1 
GOOG 0 
GOOG 1 
AAPL 0 
'))
Renaming columns.
tab.head()
pykx.Table(pykx.q('
x y    z        w   v  
-----------------------
0 AAPL 246.3763 948 100
1 GOOG 337.246  652 100
2 MSFT 143.1573 420 50 
3 AAPL 314.778  210 200
4 AAPL 444.4764 459 0  
'))
tab.rename(columns={'y': 'symbol', 'z': 'price'}).head()
pykx.Table(pykx.q('
x symbol price    w   v  
-------------------------
0 AAPL   246.3763 948 100
1 GOOG   337.246  652 100
2 MSFT   143.1573 420 50 
3 AAPL   314.778  210 200
4 AAPL   444.4764 459 0  
'))
Sampling data.
tab.sample(n=10)
pykx.Table(pykx.q('
x   y    z        w   v  
-------------------------
876 GOOG 376.395  336 100
654 MSFT 285.7745 69  0  
178 GOOG 268.9795 537 50 
105 MSFT 267.6886 339 200
451 AAPL 166.5928 567    
247 MSFT 422.5578 626 50 
688 GOOG 332.8294 150 250
418 MSFT 328.3853 540 250
500 GOOG 378.7218 160 50 
63  AAPL 153.0027 358 0  
'))
tab.sample(frac=0.1)
pykx.Table(pykx.q('
x   y    z        w   v  
-------------------------
115 AAPL 430.3011 371 0  
420 AAPL 474.9089 833 100
806 MSFT 389.469  581 250
25  MSFT 399.2739 372 0  
895 AAPL 102.9643 871 100
273 MSFT 377.4773 958 250
388 MSFT 475.8273 803 250
386 GOOG 448.5738 758 200
363 GOOG 499.7035 132 200
489 GOOG 308.7938 4   50 
77  AAPL 382.6731 988 0  
20  MSFT 12.63442 321 250
482 AAPL 236.7455 94  50 
699 AAPL 190.6258 715 100
107 GOOG 95.34587 179 250
877 AAPL 287.4927 969    
897 MSFT 326.4463 126 0  
934 AAPL 352.6163 622 250
662 MSFT 302.0044 125 0  
580 MSFT 450.5893 172 50 
..
'))
tab.sample(frac=0.1, replace=True)
pykx.Table(pykx.q('
x   y    z        w   v  
-------------------------
172 AAPL 475.7677 581 100
908 GOOG 164.7975 916    
949 MSFT 495.2473 672 0  
812 AAPL 408.4684 493 100
269 MSFT 291.8758 322 200
390 GOOG 464.4193 578 0  
720 MSFT 479.4839 765 50 
103 MSFT 52.89304 853 0  
203 GOOG 27.14407 578 200
216 MSFT 359.1574 455 100
331 MSFT 269.0314 570 250
896 MSFT 34.10398 376 50 
358 MSFT 225.2531 130 50 
539 GOOG 229.2499 504 200
833 MSFT 70.12554 750 100
594 MSFT 33.19471 549 200
385 MSFT 208.6966 485 200
539 GOOG 229.2499 504 200
8   MSFT 221.2105 308 50 
154 GOOG 174.6997 417 50 
..
'))
Table.select_dtypes()¶
Table.select_dtypes(include=None, exclude=None) Return a subset of the DataFrame’s columns based on the column dtypes.
Parameters:¶
| Name | Type | Description | Default | 
|---|---|---|---|
| include | list-like/scalar | A selection of dtypes or strings to be included. | None | 
| exclude | list-like/scalar | A selection of dtypes or strings to be excluded. | None | 
At least one of these parameters must be supplied.
Returns:¶
| Type | Description | 
|---|---|
| Dataframe | The subset of the frame including the dtypes in includeand excluding the dtypes inexclude. | 
Examples:¶
df = kx.q('([] c1:`a`b`c; c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)')
# Exclude symbol column
print(df.select_dtypes(exclude = kx.SymbolVector))
# Include a list of column types
print(df.select_dtypes(include = [kx.ShortVector, kx.LongVector]))
c2 c3 c4 -------- 1 1 1 2 2 2 3 3 3 c2 c3 ----- 1 1 2 2 3 3
Table.astype()¶
Table.astype(dtype, copy=True, errors='raise')
Cast a column/columns of the Dataframes object to a specified dtype.
Parameters:¶
| Name | Type | Description | Default | 
|---|---|---|---|
| dtype | data type, or dict of column name -> data type | Use a PyKx wrapper data type or Python type to cast all columns to the same type. Alternatively, use {col: dtype, …}, where col is a column label and dtype is PyKx wrapper data type to cast one or more of the DataFrame’s columns to column-specific types. | |
| copy | Boolean | Default of True, False not implemented | True | 
| errors | {‘raise’, ‘ignore’} | If passed anything other than 'raise', it will return the dataframe | 'raise' | 
Returns:¶
| Type | Description | 
|---|---|
| Dataframe | The dataframe with columns casted according to passed dtypes | 
Examples:¶
df = kx.q('([] c1:1 2 3i; c2:1 2 3j; c3:1 2 3h; c4:1 2 3i)')
## Casting all columns to dtype LongVector
print(df.astype(kx.LongVector))
## Casting as specified in the dcitionary supplied with
## dtype  per column
print(df.astype({'c1':kx.LongVector, 'c2':'kx.ShortVector'}))
df = kx.q('([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j; c4:("abc";"def";"ghi");c5:"abc";c6:(1 2 3;4 5 6;7 8 9))')
## Casting char and string columns to symbol columns
print(df.astype({'c4':kx.SymbolVector, 'c5':kx.SymbolVector}))
c1 c2 c3 c4 ----------- 1 1 1 1 2 2 2 2 3 3 3 3 c1 c2 c3 c4 ----------- 1 1 1 1 2 2 2 2 3 3 3 3 c1 c2 c3 c4 c5 c6 ------------------------------------------------- 2023.05.19D12:59:46.012915784 abc 1 abc a 1 2 3 2023.05.19D12:59:46.012915784 def 2 def b 4 5 6 2023.05.19D12:59:46.012915784 ghi 3 ghi c 7 8 9
Merging¶
Table.merge()¶
Table.merge(
    right,
    how='inner',
    on=None,
    left_on=None,
    right_on=None,
    left_index=False,
    right_index=False,
    sort=False,
    suffixes=('_x', '_y'),
    copy=True,
    validate=None,
    q_join=False
)Merge Table or KeyedTable objects with a database-style join.
The join is done on columns or indexes. If joining columns on columns, the DataFrame indexes will be ignored. Otherwise if joining indexes on indexes or indexes on a column or columns, the index will be passed on. When performing a cross merge, no column specifications to merge on are allowed.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| right | Union[Table/KeyedTable] | The object to merge with. | required | 
| how | str | The type of join to be used. One of {‘left’, ‘right’, ‘outer’, ‘inner’, ‘cross’}. | ‘inner’ | 
| on | str | The column name to join on. | None | 
| left_on | str | The column name in the left table to join on. | None | 
| right_on | str | The column name in the right table to join on. | None | 
| left_index | bool | Use the index of the left Table. | False | 
| right_index | bool | Use the index of the right Table. | False | 
| sort | bool | Sort the join keys of the resulting table. | False | 
| suffixes | Tuple(str, str) | The number of rows to return. | ('_x', '_y') | 
| copy | bool | If False avoid copies and modify the input table. | None | 
| validate | str | If specified checks if merge matches specified type. - “one_to_one” or “1:1”: check if merge keys are unique in both left and right datasets. - “one_to_many” or “1:m”: check if merge keys are unique in left dataset. - “many_to_one” or “m:1”: check if merge keys are unique in right dataset. - “many_to_many” or “m:m”: allowed, but does not result in checks. | None | 
| q_join | bool | If True perform native q joins instead of the pandas SQL like joins. More documentation around these joins can be found here. | False | 
Returns:
| Type | Description | 
|---|---|
| Table / KeyedTable | The resulting table like object after the join has been preformed. | 
Examples:
Merge tab1 and tab2 on the lkey and rkey columns. The value columns have the default suffixes, _x and _y, appended.
tab1 = kx.Table(data={'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
tab2 = kx.Table(data={'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
tab1.merge(tab2, left_on='lkey', right_on='rkey')
pykx.Table(pykx.q('
lkey value_x rkey value_y
-------------------------
foo  1       foo  5      
foo  1       foo  8      
foo  5       foo  5      
foo  5       foo  8      
bar  2       bar  6      
baz  3       baz  7      
'))
Merge tab1 and tab2 on the lkey and rkey columns using a native q inner join. The value columns have the default suffixes, _x and _y, appended.
tab1.merge(tab2, left_on='lkey', right_on='rkey', q_join=True)
pykx.Table(pykx.q('
lkey value_x rkey value_y
-------------------------
foo  1       foo  5      
bar  2       bar  6      
baz  3       baz  7      
foo  5       foo  5      
'))
Merge tab1 and tab2 with specified left and right suffixes appended to any overlapping columns.
tab1.merge(tab2, left_on='lkey', right_on='rkey', suffixes=('_left', '_right'))
pykx.Table(pykx.q('
lkey value_left rkey value_right
--------------------------------
foo  1          foo  5          
foo  1          foo  8          
foo  5          foo  5          
foo  5          foo  8          
bar  2          bar  6          
baz  3          baz  7          
'))
Merge tab1 and tab2 but raise an exception if the Tables have any overlapping columns.
try:
    tab1.merge(tab2, left_on='lkey', right_on='rkey', suffixes=(False, False))
except BaseException as e:
    print(f'Caught Error: {e}')
Caught Error: Columns overlap but no suffix specified: ['value']
tab1 = kx.Table(data={'a': ['foo', 'bar'], 'b': [1, 2]})
tab2 = kx.Table(data={'a': ['foo', 'baz'], 'c': [3, 4]})
Merge tab1 and tab2 on the a column using an inner join.
tab1.merge(tab2, how='inner', on='a')
pykx.Table(pykx.q('
a   b c
-------
foo 1 3
'))
Merge tab1 and tab2 on the a column using a left join.
tab1.merge(tab2, how='left', on='a')
--------------------------------------------------------------------------- QError Traceback (most recent call last) Cell In[67], line 1 ----> 1 tab1.merge(tab2, how='left', on='a') File /usr/local/lib/python3.10/site-packages/pykx/pandas_api/__init__.py:17, in api_return.<locals>.return_val(*args, **kwargs) 15 if issubclass(type(tab), MetaAtomic): 16 tab = tab.tab ---> 17 res = func(*args, **kwargs) 18 if not issubclass(type(res), PandasIndexing): 19 return res File /usr/local/lib/python3.10/site-packages/pykx/pandas_api/pandas_merge.py:334, in PandasMerge.merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, validate, q_join) 327 res = _q_merge_tables( 328 left, 329 right, 330 how, 331 added_idx 332 ) 333 else: --> 334 res = _merge_tables(left, right, on, how, added_idx, left_index, right_index, distinct) 335 res = _clean_result(self, res, how, left_index, right_index, added_idx, left, right) 336 if sort and not added_idx: File /usr/local/lib/python3.10/site-packages/pykx/pandas_api/pandas_merge.py:189, in _merge_tables(left, right, on, how, added_idx, left_index, right_index, distinct) 187 return res 188 elif how == 'left': --> 189 res = __left_join(left, right, on, distinct) 190 elif how == 'right': 191 cols = q.cols(left).py() File /usr/local/lib/python3.10/site-packages/pykx/pandas_api/pandas_merge.py:31, in __left_join(left, right, on, distinct) 29 if distinct: 30 return q.ej(on, left, right) ---> 31 return q( 32 '{[k; t; tt] a:ej[k; t; tt]; a uj t except ((count[k] _ cols tt)_a)}', 33 on, 34 left, 35 right 36 ) File /usr/local/lib/python3.10/site-packages/pykx/embedded_q.py:142, in EmbeddedQ.__call__(self, query, wait, sync, *args) 140 result = _keval(bytes(wrappers.CharVector(query)), *[wrappers.K(x) for x in args]) 141 if wait is None or wait: --> 142 return factory(result, False) 143 return self('::', wait=True) File /usr/local/lib/python3.10/site-packages/pykx/_wrappers.pyx:499, in pykx._wrappers._factory() File /usr/local/lib/python3.10/site-packages/pykx/_wrappers.pyx:492, in pykx._wrappers.factory() QError: type
Merge tab1 and tab2 using a cross join.
tab1 = kx.Table(data={'left': ['foo', 'bar']})
tab2 = kx.Table(data={'right': [7, 8]})
tab1.merge(tab2, how='cross')
pykx.Table(pykx.q('
left right
----------
foo  7    
foo  8    
bar  7    
bar  8    
'))