Adding dates for unique EIDs row-wise to get Years of Service
Solution 1:
Not the best solution, but it gets the job done. Considering the input is a CSV file stored in company.csv
and using groupby
on EID
:
from itertools import chain
import pandas as pd
def compute_yos(record):
if len(record) == 1 and record.iloc[0]["Company"] == "ABC":
return [pd.NaT]
elif len(record) > 1 and record.iloc[-1]["Company"] == "ABC":
yos = [record.iloc[-1]["Start_Date"] - record.iloc[0]["Start_Date"]]
return yos + [pd.NaT] * (len(record) - 1)
elif len(record) == 1 and record.iloc[0]["Company"] != "ABC":
return [record.iloc[0]["End_Date"] - record.iloc[0]["Start_Date"]]
else:
return [pd.NaT] * len(record)
input_df = pd.read_csv("company.csv")
print(input_df)
input_df[["Start_Date", "End_Date"]] = input_df[["Start_Date", "End_Date"]].apply(
pd.to_datetime
)
grouping = input_df.groupby(["EID"]).apply(compute_yos)
concat_grouping = chain.from_iterable(grouping)
input_df["YoS"] = list(concat_grouping)
print(input_df)
input:
EID Company Start_Date End_Date T_F
0 A111 ABC 2015-07-20 NaT True
1 B111 DEF 1983-06-01 NaT False
2 B111 ABC 2017-01-01 NaT True
3 C111 GHI 1980-10-01 1981-08-31 True
4 D111 JKL 1973-05-01 1977-11-30 True
5 E111 ABC 2006-04-24 NaT True
6 F111 ABC 1991-06-10 1994-12-15 False
7 F111 MNO 1994-12-01 2002-08-31 False
8 F111 ABC 2002-08-01 NaT True
9 G111 ABC 1979-01-01 NaT True
10 H111 ABC 2002-02-01 NaT True
output:
EID Company Start_Date End_Date T_F YoS
0 A111 ABC 2015-07-20 NaT True NaT
1 B111 DEF 1983-06-01 NaT False 12268 days
2 B111 ABC 2017-01-01 NaT True NaT
3 C111 GHI 1980-10-01 1981-08-31 True 334 days
4 D111 JKL 1973-05-01 1977-11-30 True 1674 days
5 E111 ABC 2006-04-24 NaT True NaT
6 F111 ABC 1991-06-10 1994-12-15 False 4070 days
7 F111 MNO 1994-12-01 2002-08-31 False NaT
8 F111 ABC 2002-08-01 NaT True NaT
9 G111 ABC 1979-01-01 NaT True NaT
10 H111 ABC 2002-02-01 NaT True NaT