met het handje
# ==> Analysedf1.info()
df1.nunique()
print(df1.describe())
df1[df1['VALID_BSN'].isnull()]
df1[df1['VALID_BSN']=='0'].info()
df1[df1['BSN'].isnull()]
df1[df1['BSN']=='999999999']
dubbele records
df1[df1.duplicated(subset=None)]
df1=xls_file.parse(0,skiprows=0,dtype=str)
df1=df1.astype({'VD_Ingangsdatum': 'datetime64', 'VD_Einddatum': 'datetime64'})
dictVeldnamen={'Valid_BSN':'VALID_BSN','LR_BSN': 'BSN', 'VD_Ingangsdatum': 'BEGIN_DATUM','VD_Einddatum':'EIND_DATUM','Code_Bron': 'CODE_BRON'}
df1.rename(columns=dictVeldnamen,inplace='true')
df1.drop_duplicates(subset=None, keep='last', inplace=True)
alternatief pandas-profiling
# importing required packagesimport pandas as pd
import pandas_profiling
import numpy as np
# importing the data
df = pd.read_csv('/Users/lukas/Downloads/titanic/train.csv')
pandas_profiling.ProfileReport(df)
Geen opmerkingen:
Een reactie posten