Coverage for compiler_admin / services / harvest.py: 100%

49 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-28 05:48 +0000

1import os 

2import sys 

3import time 

4from datetime import timedelta 

5from typing import TextIO 

6 

7import pandas as pd 

8 

9import compiler_admin.services.files as files 

10from compiler_admin.services.time import TimeSummary 

11 

12 

13class HarvestTime: 

14 

15 # input CSV columns needed for conversion 

16 HARVEST_COLUMNS = ["Date", "Client", "Project", "Notes", "Hours", "First name", "Last name"] 

17 

18 # default output CSV columns 

19 TOGGL_COLUMNS = ["Email", "Start date", "Start time", "Duration", "Project", "Task", "Client", "Billable", "Description"] 

20 

21 def __init__(self): 

22 self.converters = {"toggl": self.convert_to_toggl} 

23 

24 def _calc_start_time(self, group: pd.DataFrame): 

25 """Start time is offset by the previous record's duration, with a default of 0 offset for the first record.""" 

26 group["Start time"] = group["Start time"] + group["Duration"].shift(fill_value=pd.to_timedelta("00:00:00")).cumsum() 

27 return group 

28 

29 def _duration_str(self, duration: timedelta) -> str: 

30 """Use total seconds to convert to a datetime and format as a string e.g. 01:30.""" 

31 return time.strftime("%H:%M", time.gmtime(duration.total_seconds())) 

32 

33 def _toggl_client_name(self): 

34 """Gets the value of the TOGGL_CLIENT_NAME env var.""" 

35 return os.environ.get("TOGGL_CLIENT_NAME") 

36 

37 def convert_to_toggl( 

38 self, 

39 source_path: str | TextIO = sys.stdin, 

40 output_path: str | TextIO = sys.stdout, 

41 output_cols: list[str] = TOGGL_COLUMNS, 

42 client_name: str = None, 

43 **kwargs, 

44 ): 

45 """Convert Harvest formatted entries in source_path to equivalent Toggl formatted entries. 

46 

47 Args: 

48 source_path: The path to a readable CSV file of Harvest time entries; or a readable buffer of the same. 

49 

50 output_cols (list[str]): A list of column names for the output 

51 

52 output_path: The path to a CSV file where Toggl time entries will be written; or a writeable buffer for the same. 

53 

54 Returns: 

55 None. Either prints the resulting CSV data or writes to output_path. 

56 """ 

57 if client_name is None: 

58 client_name = self._toggl_client_name() 

59 

60 # read CSV file, parsing dates 

61 source = files.read_csv(source_path, usecols=self.HARVEST_COLUMNS, parse_dates=["Date"], cache_dates=True) 

62 

63 # rename columns that can be imported as-is 

64 source.rename(columns={"Project": "Task", "Notes": "Description", "Date": "Start date"}, inplace=True) 

65 

66 # update static calculated columns 

67 source["Client"] = client_name 

68 source["Project"] = client_name 

69 source["Billable"] = "Yes" 

70 

71 # add the Email column 

72 source["Email"] = source["First name"].apply(lambda x: f"{x.lower()}@compiler.la") 

73 

74 # Convert numeric Hours to timedelta Duration 

75 source["Duration"] = source["Hours"].apply(pd.to_timedelta, unit="hours") 

76 

77 # Default start time to 09:00 

78 source["Start time"] = pd.to_timedelta("09:00:00") 

79 

80 user_days = ( 

81 source 

82 # sort and group by email and date 

83 .sort_values(["Email", "Start date"]).groupby(["Email", "Start date"], observed=False) 

84 # calculate a start time within each group (excluding the groupby columns) 

85 .apply(self._calc_start_time, include_groups=False) 

86 ) 

87 

88 # convert timedeltas to duration strings 

89 user_days["Duration"] = user_days["Duration"].apply(self._duration_str) 

90 user_days["Start time"] = user_days["Start time"].apply(self._duration_str) 

91 

92 # re-sort by start date/time and user 

93 # reset the index to get rid of the group multi index and fold the group columns back down 

94 output_data = pd.DataFrame(data=user_days).reset_index() 

95 output_data.sort_values(["Start date", "Start time", "Email"], inplace=True) 

96 

97 files.write_csv(output_path, output_data, output_cols) 

98 

99 def summarize(self, path: str | TextIO) -> "TimeSummary": 

100 """Summarize a Harvest CSV file. 

101 

102 Args: 

103 path (str | TextIO): The path to a readable CSV file of Harvest time entries; or a readable buffer of the same. 

104 

105 Returns: 

106 TimeSummary: A summary of the time entries. 

107 """ 

108 

109 # read CSV file, parsing dates 

110 source = files.read_csv(path, usecols=self.HARVEST_COLUMNS, parse_dates=["Date"], cache_dates=True) 

111 

112 summary = TimeSummary( 

113 earliest_date=source["Date"].min().date(), 

114 latest_date=source["Date"].max().date(), 

115 total_rows=len(source), 

116 total_hours=source["Hours"].sum(), 

117 ) 

118 

119 # Group by Project to get hours per project 

120 project_hours = source.groupby(["Project"])["Hours"].sum().to_dict() 

121 summary.hours_per_project = project_hours 

122 

123 # Group by User and Project to get hours per user/project 

124 user_project_hours = source.groupby(["First name", "Last name", "Project"])["Hours"].sum().to_dict() 

125 # create a nested dict of the form {user: {project: hours}} 

126 for (first, last, project), hours in user_project_hours.items(): 

127 user = f"{first} {last}" 

128 if user not in summary.hours_per_user_project: 

129 summary.hours_per_user_project[user] = {} 

130 summary.hours_per_user_project[user][project] = hours 

131 

132 return summary