Python Pandas GroupBy function. The following shows how to use groupBy function of DataFrame using pandas module.
In [1]:
import pandas as pd
In [2]:
d = {'one':[1,1,1,1,1],
'two':[2,2,2,2,2],
'letter':['a','a','b','b','c']}
df = pd.DataFrame(d)
df
Out[2]:
one | two | letter | |
---|---|---|---|
0 | 1 | 2 | a |
1 | 1 | 2 | a |
2 | 1 | 2 | b |
3 | 1 | 2 | b |
4 | 1 | 2 | c |
In [3]:
one = df.groupby('letter')
one.sum()
Out[3]:
one | two | |
---|---|---|
letter | ||
a | 2 | 4 |
b | 2 | 4 |
c | 1 | 2 |
In [4]:
letterone = df.groupby(['letter', 'one']).sum()
letterone
Out[4]:
two | ||
---|---|---|
letter | one | |
a | 1 | 4 |
b | 1 | 4 |
c | 1 | 2 |
In [5]:
letterone.index
Out[5]:
MultiIndex(levels=[['a', 'b', 'c'], [1]],
labels=[[0, 1, 2], [0, 0, 0]],
names=['letter', 'one'])
In [6]:
letterone = df.groupby(['letter', 'one'], as_index=False).sum()
letterone
Out[6]:
letter | one | two | |
---|---|---|---|
0 | a | 1 | 4 |
1 | b | 1 | 4 |
2 | c | 1 | 2 |
In [7]:
letterone.index
Out[7]:
Int64Index([0, 1, 2], dtype='int64')
In [8]:
letterone['one'][2]
Out[8]:
1