, :
import pandas as pd
from numpy import array, arange
from numpy.random import randint, seed
seed(42)
n_rows = 1000
d = pd.DataFrame(randint(1,10,(n_rows,8)))
dgs = array([3,4,1,8,9,2,3,7,10,8])
grps = pd.cut(randint(1,5,n_rows),arange(1,5))
f = lambda x: dgs[x.index].mean()
g = lambda x: x.groupby(x).apply(f)
h = lambda x: x.apply(g,axis=1).mean(axis=0)
print d.groupby(grps).apply(h)
def group_process(df2):
s = df2.stack()
v = np.repeat(dgs[None, :df2.shape[1]], df2.shape[0], axis=0).ravel()
return pd.Series(v).groupby([s.index.get_level_values(0), s.values]).mean().mean(level=1)
print d.groupby(grps).apply(group_process)
:
1 2 3 4 5 6 7 \
(1, 2] 4.621575 4.625887 4.775235 4.954321 4.566441 4.568111 4.835664
(2, 3] 4.446347 4.138528 4.862613 4.800538 4.582721 4.595890 4.794183
(3, 4] 4.776144 4.510119 4.391729 4.392262 4.930556 4.695776 4.630068
8 9
(1, 2] 4.246085 4.520384
(2, 3] 5.237360 4.418934
(3, 4] 4.829167 4.681548
[3 rows x 9 columns]
1 2 3 4 5 6 7 \
(1, 2] 4.621575 4.625887 4.775235 4.954321 4.566441 4.568111 4.835664
(2, 3] 4.446347 4.138528 4.862613 4.800538 4.582721 4.595890 4.794183
(3, 4] 4.776144 4.510119 4.391729 4.392262 4.930556 4.695776 4.630068
8 9
(1, 2] 4.246085 4.520384
(2, 3] 5.237360 4.418934
(3, 4] 4.829167 4.681548
[3 rows x 9 columns]
70 , , 10 ** 7 .