Pandas > Select data

Name
Code
Output
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(3, 2)), columns=['C1','C2'])
print ("dataframe")
print (dataframe)
print ("Info axis")
print (dataframe.lookup([1,2],['C1','C1']))
dataframe
   C1  C2
0  62  41
1  67  49
2  12  96
Info axis
[67 12]
dataframe = pandas.DataFrame(numpy.random.randint(0,10,size=(5, 3)), columns=['C1','C2','C3'])
print ("dataframe")
print (dataframe)
dataframe2 = dataframe.filter(items=[1,3],axis=0)
print ("dataframe selected items")
print (dataframe2)
dataframe3 = dataframe.filter(items=['C1','C3'],axis=1)
print ("dataframe selected items")
print (dataframe3)
dataframe
   C1  C2  C3
0   8   2   8
1   5   7   7
2   9   5   0
3   6   8   1
4   0   1   9
dataframe selected items
   C1  C2  C3
1   5   7   7
3   6   8   1
dataframe selected items
   C1  C3
0   8   8
1   5   7
2   9   0
3   6   1
4   0   9
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(5, 4)), columns=['C1','C2','C3','C4'])
print ("dataframe")
print (dataframe)
print ("dataframe selected columns")
print (dataframe[['C1','C2']])
dataframe
   C1  C2  C3  C4
0  47  34  32  96
1  30  25  73  18
2  61  17  27   9
3  74  23  15  73
4  79  49  42  44
dataframe selected columns
   C1  C2
0  47  34
1  30  25
2  61  17
3  74  23
4  79  49
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(3, 3)), columns=['C1','C2','C3'])
print ("dataframe")
print (dataframe)
dataframe2 = dataframe.loc[:,(dataframe>50).any()]
print ("dataframe selected columns")
print (dataframe2)
dataframe3 = dataframe.loc[:,(dataframe>50).all()]
print ("dataframe selected columns")
print (dataframe3)
dataframe
   C1  C2  C3
0  80  41  76
1  38   6  57
2  45  14  84
dataframe selected columns
   C1  C3
0  80  76
1  38  57
2  45  84
dataframe selected columns
   C3
0  76
1  57
2  84
dataframe = pandas.DataFrame(numpy.random.randint(0,10,size=(5, 3)), columns=['C1','C2','C3'])
print ("dataframe")
print (dataframe)
dataframe2 = dataframe.loc[dataframe.C1.isin([1,2,3])]
print ("dataframe selected columns")
print (dataframe2)
dataframe
   C1  C2  C3
0   8   0   4
1   2   0   5
2   5   4   5
3   6   4   1
4   3   5   9
dataframe selected columns
   C1  C2  C3
1   2   0   5
4   3   5   9
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(5, 4)), columns=['C1','C2','C3','C4'])
print ("dataframe")
print (dataframe)
print ("dataframe conditional selection")
print (dataframe[(dataframe['C1']>50) & ((dataframe['C2']<25)| (dataframe['C2']>75))])
dataframe
   C1  C2  C3  C4
0  72  23  21   1
1  59  67  77  59
2  47  49  34   6
3  28   8  66  97
4  91   4  44  37
dataframe conditional selection
   C1  C2  C3  C4
0  72  23  21   1
4  91   4  44  37
dataframe = pandas.DataFrame(numpy.random.randint(0,10,size=(5, 3)), columns=['C1','C2','C3'])
print ("dataframe")
print (dataframe)
dataframe2 = dataframe.query('C1 > C2')
print ("dataframe selected items")
print (dataframe2)
dataframe
   C1  C2  C3
0   7   3   2
1   1   6   8
2   9   7   3
3   3   2   7
4   4   6   0
dataframe selected items
   C1  C2  C3
0   7   3   2
2   9   7   3
3   3   2   7
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(5, 4)), columns=['C1','C2','C3','C4'])
print ("dataframe")
print (dataframe)
print ("dataframe conditional selection")
print (dataframe[dataframe['C1']>50])
dataframe
   C1  C2  C3  C4
0  86  13   3  83
1  63  58   1  24
2  80  87  52  40
3  34  35  37  43
4  24  30  83  11
dataframe conditional selection
   C1  C2  C3  C4
0  86  13   3  83
1  63  58   1  24
2  80  87  52  40
dataframe = pandas.DataFrame(numpy.random.randint(0,10,size=(5, 3)), columns=['C1','C2','C3'])
print ("dataframe")
print (dataframe)
dataframe2 = dataframe.where(dataframe>5)
print ("dataframe selected items")
print (dataframe2)
dataframe
   C1  C2  C3
0   9   6   4
1   7   7   0
2   1   7   7
3   4   1   5
4   6   2   6
dataframe selected items
    C1   C2   C3
0  9.0  6.0  NaN
1  7.0  7.0  NaN
2  NaN  7.0  7.0
3  NaN  NaN  NaN
4  6.0  NaN  6.0
df_index = [101,202,303,404,505]
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(5, 3)), columns=['C1','C2','C3'],index = df_index)
print ("dataframe")
print (dataframe)
print ("row 3 with label 303")
print (dataframe.loc[303])
dataframe
     C1  C2  C3
101  88  84  54
202  15  99  10
303   6  57  57
404  23  51  51
505  92  12  36
row 3 with label 303
C1     6
C2    57
C3    57
Name: 303, dtype: int64
df_index = [101,202,303,404,505]
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(5, 3)), columns=['C1','C2','C3'],index = df_index)
print ("dataframe")
print (dataframe)
print ("row 3")
print (dataframe.iloc[2])
dataframe
     C1  C2  C3
101  26  73  73
202  14  68  61
303   6   1   1
404  40  83   1
505  49  84  31
row 2
C1    6
C2    1
C3    1
Name: 303, dtype: int64
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(5, 3)), columns=['C1','C2','C3'])
print ("dataframe")
print (dataframe)
# single contition
dataframe2 = dataframe.loc[dataframe['C1'] > 50]
print ("dataframe selected rows")
print (dataframe2)
# multiple conditions
dataframe3 = dataframe.loc[(dataframe['C1'] > 20) & (dataframe['C1'] < 50)]
print ("dataframe selected rows")
print (dataframe3)
dataframe
   C1  C2  C3
0  24  55  55
1  84  97  32
2  65  15  83
3  11  31  95
4  41  96  70
dataframe selected rows
   C1  C2  C3
1  84  97  32
2  65  15  83
dataframe selected rows
   C1  C2  C3
0  24  55  55
4  41  96  70
dataframe = pandas.DataFrame(numpy.random.randint(0,10,size=(5, 3)), columns=['C1','C2','C3'])
print ("dataframe")
print (dataframe)
# select elements with even index number
dataframe2 = dataframe.select(lambda x: x%2==0)
print ("dataframe selected items")
print (dataframe2)
dataframe
   C1  C2  C3
0   5   6   0
1   1   2   0
2   9   1   7
3   6   2   7
4   0   7   9
dataframe selected items
   C1  C2  C3
4   0   7   9