1
+ # https://stackoverflow.com/questions/36909977/update-row-values-where-certain-condition-is-met-in-pandas
2
+
3
+ ## converting the dataframe into required format...
4
+ import pandas as pd
5
+ df = pd .read_csv ('/root/sharedfolder/CheXpert-v1.0-small/train_v1.csv' )
6
+ df .head ()
7
+ df1 = df .drop (['Sex' , 'Age' , 'Frontal/Lateral' ,'AP/PA' ], axis = 1 )
8
+ df1 .shape
9
+
10
+ df_rename = df1 .rename (columns = {'No Finding' :'NoFinding' ,'Enlarged Cardiomediastinum' : 'EnlargedCardiomediastinum' , 'Lung Opacity' : 'LungOpacity' ,
11
+ 'Lung Lesion' :'LungLesion' , 'Pleural Effusion' :'PleuralEffusion' , 'Pleural Other' :'PleuralOther' ,'Support Devices' :'SupportDevices'
12
+
13
+ })
14
+ list (df_rename .columns )
15
+
16
+ df_rename = df_rename .dropna (subset = [ 'NoFinding' , 'EnlargedCardiomediastinum' , 'Cardiomegaly' , 'LungOpacity' , 'LungLesion' , 'Edema' , 'Consolidation' , 'Pneumonia' , 'Atelectasis' , 'Pneumothorax' , 'PleuralEffusion' , 'PleuralOther' , 'Fracture' , 'SupportDevices' ], how = 'all' )
17
+
18
+
19
+ df_rename .columns
20
+ #df_rename.dropna(thresh=2)
21
+
22
+
23
+ new_cols = ['v1' ,'v2' ,'v3' ,'v4' ,'v5' ,'v6' ,'v7' ,'v8' ,'v9' ,'v10' ,'v11' ,'v12' ,'v13' ,'v14' ]
24
+ df_rename = df_rename .reindex (df_rename .columns .union (new_cols ), axis = 1 )
25
+
26
+ df_rename .loc [df_rename ['NoFinding' ]== 1 , 'v1' ] = 'NoFinding'
27
+ df_rename .loc [df_rename ['Atelectasis' ]== 1 , 'v2' ] = 'Atelectasis'
28
+ df_rename .loc [df_rename ['Cardiomegaly' ]== 1 , 'v3' ] = 'Cardiomegaly'
29
+ df_rename .loc [df_rename ['Consolidation' ]== 1 , 'v4' ] = 'Consolidation'
30
+ df_rename .loc [df_rename ['Edema' ]== 1 , 'v5' ] = 'Edema'
31
+ df_rename .loc [df_rename ['EnlargedCardiomediastinum' ]== 1 , 'v6' ] = 'EnlargedCardiomediastinum'
32
+ df_rename .loc [df_rename ['Fracture' ]== 1 , 'v7' ] = 'Fracture'
33
+ df_rename .loc [df_rename ['LungLesion' ]== 1 , 'v8' ] = 'LungLesion'
34
+ df_rename .loc [df_rename ['LungOpacity' ]== 1 , 'v9' ] = 'LungOpacity'
35
+ df_rename .loc [df_rename ['PleuralEffusion' ]== 1 , 'v10' ] = 'PleuralEffusion'
36
+ df_rename .loc [df_rename ['PleuralOther' ]== 1 , 'v11' ] = 'PleuralOther'
37
+ df_rename .loc [df_rename ['Pneumonia' ]== 1 , 'v12' ] = 'Pneumonia'
38
+ df_rename .loc [df_rename ['Pneumothorax' ]== 1 , 'v13' ] = 'Pneumothorax'
39
+ df_rename .loc [df_rename ['Support' Path ',Devices' ]== 1 , 'v14' ] = 'SupportDevices'
40
+
41
+
42
+
43
+ df_final = df_rename [['Path' ,'v1' ,'v2' ,'v3' ,'v4' ,'v5' ,'v6' ,'v7' ,'v8' ,'v9' ,'v10' ,'v11' ,'v12' ,'v13' ,'v14' ]]
44
+
45
+
46
+ # replace all NaN
47
+ df_final = df_final .fillna ('' )
48
+ df_final ['tags' ] = df_final [['v1' ,'v2' ,'v3' ,'v4' ,'v5' ,'v6' ,'v7' ,'v8' ,'v9' ,'v10' ,'v11' ,'v12' ,'v13' ,'v14' ]].apply (lambda x : ' ' .join (x ), axis = 1 )
49
+ df_final01 = df_final [['Path' ,'tags' ]]
50
+
51
+ df_final02 .dropna (subset = ['tags' ])
52
+ df_final02 .to_csv ('/root/sharedfolder/train_v01.csv' , sep = "," , encoding = 'utf-8' ,index = False )
53
+
54
+ train_df = pd .read_csv ('/root/sharedfolder/CheXpert-v1.0-small/train_v03.csv' )
55
+ train_df
0 commit comments