Pandas > Aggregate data

Name
Code
Output
dataframe = pandas.DataFrame(numpy.random.randint(0,10,size=(5, 3)), columns=['C1','C2','C3'])
print ("dataframe")
print (dataframe)
print ("aggregates")
aggregates = dataframe.agg(['sum', 'max','mean'])
print (aggregates)
dataframe
   C1  C2  C3
0   4   3   0
1   5   0   9
2   8   9   2
3   6   7   5
4   5   9   2
aggregates
        C1    C2    C3
sum   28.0  28.0  18.0
max    8.0   9.0   9.0
mean   5.6   5.6   3.6
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(5, 2)), columns=['C1','C2'])
print ("dataframe")
print (dataframe)
# sum for columns
sum_columns = dataframe[['C1','C2']].apply(sum,axis=0)
print ("sum for columns")
print (sum_columns)
# sum for rows
sum_rows = dataframe[['C1','C2']].apply(sum,axis=1)
print ("sum for rows")
print (sum_rows)
dataframe
   C1  C2
0  56   2
1  94  89
2   0  56
3  62  80
4  80  98
sum for columns
C1    292
C2    325
dtype: int64
sum for rows
0     58
1    183
2     56
3    142
4    178
dtype: int64
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(5, 2)), columns=['C1','C2'])
print ("dataframe")
print (dataframe)
function_result = dataframe.applymap(lambda x: x*10)
print ("apply result")
print (function_result)
dataframe
   C1  C2
0  54  11
1  31  59
2  81  76
3  42  46
4   8  24
apply result
    C1   C2
0  540  110
1  310  590
2  810  760
3  420  460
4   80  240
dataframe = pandas.DataFrame(numpy.random.randint(0,4,size=(5, 2)), columns=['C1','C2'])
print ("dataframe")
print (dataframe)
# cross-tabulation of two factors (default is a frequency table) 
aggregate = pandas.crosstab(dataframe.C1, dataframe.C2)
print ("aggregate data")
print (aggregate)
dataframe
   C1  C2
0   0   3
1   2   2
2   3   2
3   2   1
4   2   1
aggregate data
C2  1  2  3
C1         
0   0  0  1
2   2  1  0
3   0  1  0
dataframe = pandas.DataFrame(numpy.random.randint(0,5,size=(5, 3)), columns=['C1','C2','C3'])
print ("dataframe")
print (dataframe)
aggregate = dataframe.groupby('C1').sum()
print ("aggregated data")
print (aggregate)
dataframe
   C1  C2  C3
0   2   4   2
1   2   1   1
2   4   1   4
3   3   1   1
4   3   2   0
aggregated data
    C2  C3
C1        
2    5   3
3    3   1
4    1   4
dataframe = pandas.DataFrame(numpy.random.randint(0,4,size=(5, 3)), columns=['C1','C2','C3'])
print ("dataframe")
print (dataframe)
pivot = pandas.pivot_table(dataframe, values='C3', index=['C1'], columns=['C2'], aggfunc=sum)
print ("pivot")
print (pivot)
dataframe
   C1  C2  C3
0   1   0   3
1   1   3   0
2   1   0   2
3   2   0   2
4   1   2   2
pivot
C2    0    2    3
C1               
1   5.0  2.0  0.0
2   2.0  NaN  NaN