Pandas > Manage unique & empty data

Name
Code
Output
dataframe = pandas.DataFrame(numpy.random.randint(0,10,size=(5, 2)), columns=['C1','C2'])
print ("dataframe")
print (dataframe)
# alternatively duplicates = dataframe.C1.duplicated()
duplicates = dataframe.duplicated('C1')
print ("duplicates")
print (duplicates)
dataframe
   C1  C2
0   9   1
1   2   2
2   6   1
3   9   7
4   0   7
duplicates
0    False
1    False
2    False
3     True
4    False
dtype: bool
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(5, 3)), columns=['C1','C2','C3'])
dataframe['C2'] = numpy.nan
print ("dataframe")
print (dataframe)
# option with inplace will change the same dataframe
dataframe2 = dataframe.dropna(axis=1)
print ("dataframe drop empty column")
print (dataframe2)
dataframe
   C1  C2  C3
0  86 NaN   8
1  40 NaN  39
2  35 NaN  31
3  52 NaN  10
4  73 NaN  41
dataframe drop empty column
   C1  C3
0  86   8
1  40  39
2  35  31
3  52  10
4  73  41
dataframe = pandas.DataFrame(numpy.random.randint(0,10,size=(5, 2)), columns=['C1','C2'])
print ("dataframe")
print (dataframe)
dataframe2 = dataframe.drop_duplicates('C1', keep='first')
print ("dataframe without duplicates")
print (dataframe2)
dataframe
   C1  C2
0   0   6
1   0   9
2   3   3
3   3   0
4   4   6
dataframe without duplicates
   C1  C2
0   0   6
2   3   3
4   4   6
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(5, 3)), columns=['C1','C2','C3'])
dataframe.loc[3] = numpy.nan
print ("dataframe")
print (dataframe)
# option with inplace will change the same dataframe
dataframe2 = dataframe.dropna(axis=0)
print ("dataframe drop empty row")
print (dataframe2)
dataframe
     C1    C2    C3
0  81.0  65.0  92.0
1  75.0  15.0  46.0
2  44.0  96.0   7.0
3   NaN   NaN   NaN
4  34.0  95.0  76.0
dataframe drop empty row
     C1    C2    C3
0  81.0  65.0  92.0
1  75.0  15.0  46.0
2  44.0  96.0   7.0
4  34.0  95.0  76.0
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(5, 3)), columns=['C1','C2','C3'])
dataframe['C2'] = numpy.nan
dataframe.loc[3] = numpy.nan
print ("dataframe")
print (dataframe)
# option with inplace will change the same dataframe
dataframe2 = dataframe.fillna(77)
print ("dataframe with filled empty values")
print (dataframe2)
dataframe
     C1  C2    C3
0  11.0 NaN  41.0
1  14.0 NaN   9.0
2  79.0 NaN   5.0
3   NaN NaN   NaN
4   5.0 NaN  70.0
dataframe with filled empty values
     C1    C2    C3
0  11.0  77.0  41.0
1  14.0  77.0   9.0
2  79.0  77.0   5.0
3  77.0  77.0  77.0
4   5.0  77.0  70.0
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(5, 4)), columns=['C1','C2','C3','C4'])
print ("dataframe")
print (dataframe)
print ("dataframe column unique values")
print (dataframe['C1'].unique())
dataframe
   C1  C2  C3  C4
0  97  77  28  12
1  61  58  86  64
2   0  12   0  62
3   0  66  88  34
4  16   6  51  60
dataframe column unique values
[97 61  0 16]
dataframe = pandas.DataFrame(numpy.random.randint(0,100,size=(3, 3)), columns=['C1','C2','C3'])
dataframe['C2'][1] = numpy.NaN
dataframe['C3'] = numpy.NaN
print ("dataframe")
print (dataframe)
dataframe2 = dataframe.loc[:,dataframe.isnull().any()]
print ("dataframe selected columns")
print (dataframe2)
dataframe3 = dataframe.loc[:,dataframe.notnull().all()]
print ("dataframe selected columns")
print (dataframe3)
dataframe
   C1    C2  C3
0  91   7.0 NaN
1  77   NaN NaN
2  54  62.0 NaN
dataframe selected columns
     C2  C3
0   7.0 NaN
1   NaN NaN
2  62.0 NaN
dataframe selected columns
   C1    C2
0  91   7.0
1  77   NaN
2  54  62.0