donderdag 23 maart 2023

Datums bepalen adhv begin en einddatum in Dataframe

Voorbeeld op losse velden

 #######################################################################
# import necessary packages
from datetime import datetime

# dates
start_date = datetime(2021, 1,14)
end_date = datetime(2022, 1, 1)
 

#The pandas library provides the pd.period_range() method that creates a TimeSeries object from a date range and returns a tuple of the days between two dates.
month_list = pd.period_range(start=start_date, end=end_date, freq='M')

month_list = [month.strftime("%b-%Y") for month in month_list]


###################################################################################

 

toegepast op pandas dataframe

# Standaard modules die we eigenlijk altijd nodig hebben
import os as os, sys as sys, shutil as shutil, pathlib as pathlib, hashlib as hashlib, datetime as dt, pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns
from datetime import date
####################################################################################################################################################################################################################################################################
mainFolderTeamData = r"C:\Users\x-wagene002\Documents\Python\datateamtoBI"

datafolder=mainFolderTeamData + '\\data'

inputfolder=mainFolderTeamData + '\\output\\dataset_aantalpermaandbedragen'
df1=pd.read_csv(inputfolder)

df1.columns


df1.rename(columns={"Clientnr Zorg-Ned":"klantnr"},inplace=True)
df1.rename(columns={"Bedrag Verplicht":"bedrag"},inplace=True)
df1.rename(columns={"Datum Verplichting Begin Jaarverplichting":"startdatum"},inplace=True)
df1.rename(columns={"Datum Einde Geldig Jaarverplichting":"einddatum"},inplace=True)
df1.columns

df1['startdatum']
df1['einddatum']

start=df1['startdatum'].iloc[1]
start_datum = dt.datetime.strptime(start, "%d-%m-%Y").date()

df1['startdatum']= pd.to_datetime(df1['startdatum'], format='%d-%m-%Y')
df1['einddatum']= pd.to_datetime(df1['einddatum'], format='%d-%m-%Y')


# df1['startdatum']= pd.to_datetime(df1['startdatum'], format='%Y-%m-%d')
# df1['einddatum']= pd.to_datetime(df1['einddatum'], format='%Y-%m-%d')

df1.dtypes

df1['maandlist'] = df1.apply(lambda row: pd.period_range(start=row['startdatum'], end=row['einddatum'], freq='M'),axis=1)

df1['maanden']=df1.apply(lambda row:  [month.strftime("%Y%m") for month in row['maandlist']],axis=1)

df1['#maand_actief']=df1.apply(lambda row: len(row['maanden']),axis=1)
df_gen=df1.explode('maanden')
df_gen

 

Datums bepalen adhv begin en einddatum in Dataframe

Voorbeeld op losse velden  ####################################################################### # import necessary packages from datetime...