Coverage for compiler_admin / services / toggl.py: 99%

154 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-28 05:48 +0000

1import io 

2import os 

3import sys 

4from datetime import datetime 

5from functools import cache 

6from typing import TextIO 

7 

8import pandas as pd 

9 

10import compiler_admin.services.files as files 

11from compiler_admin.api.toggl import TogglOrganization, TogglReports, TogglWorkspace 

12from compiler_admin.services.google import GoogleAccount 

13from compiler_admin.services.time import TimeSummary 

14 

15GROUPS = dict(contractors="Contractors", service_accounts="Service Accounts", staff="Staff", partners="Partners") 

16 

17 

18class TogglService: 

19 def __init__(self): 

20 token = os.environ.get("TOGGL_API_TOKEN") 

21 workspace = os.environ.get("TOGGL_WORKSPACE_ID") 

22 organization = os.environ.get("TOGGL_ORGANIZATION_ID") 

23 self.api_organization = TogglOrganization(token, workspace, organization) 

24 self.api_reports = TogglReports(token, workspace) 

25 self.api_workspace = TogglWorkspace(token, workspace) 

26 

27 

28class TogglTime(TogglService): 

29 # input columns needed for conversion 

30 TOGGL_COLUMNS = ["Email", "Project", "Task", "Client", "Start date", "Start time", "Duration", "Description"] 

31 

32 # default output CSV columns for Harvest 

33 HARVEST_COLUMNS = ["Date", "Client", "Project", "Task", "Notes", "Hours", "First name", "Last name"] 

34 # default output CSV columns for Justworks 

35 JUSTWORKS_COLUMNS = ["First Name", "Last Name", "Work Email", "Start Date", "End Date", "Regular Hours"] 

36 

37 def __init__(self): 

38 super().__init__() 

39 self.converters = {"harvest": self.convert_to_harvest, "justworks": self.convert_to_justworks} 

40 

41 @cache 

42 def project_info(self): 

43 """Cache of previously seen project information, keyed on Toggl project name.""" 

44 return files.JsonFileCache("TOGGL_PROJECT_INFO") 

45 

46 @cache 

47 def user_info(self): 

48 """Cache of previously seen user information, keyed on email.""" 

49 return files.JsonFileCache("TOGGL_USER_INFO") 

50 

51 def _get_name(self, email: str, name_key) -> str: 

52 """Get cached name or query from Google.""" 

53 info = self.user_info() 

54 user = info.get(email) 

55 name = user.get(name_key) if user else None 

56 if name is None: 

57 user = GoogleAccount(email).get_info() 

58 name = user.get(name_key) if user else None 

59 if email in info: 

60 info[email].update(user) 

61 else: 

62 info[email] = user 

63 return name 

64 

65 def _get_first_name(self, email: str) -> str: 

66 """Get cached last name or query from Google.""" 

67 return self._get_name(email, "First Name") 

68 

69 def _get_last_name(self, email: str): 

70 """Get cached last name or query from Google.""" 

71 return self._get_name(email, "Last Name") 

72 

73 def _prepare_input(self, source_path: str | TextIO, column_renames: dict = {}) -> pd.DataFrame: 

74 """Parse and prepare CSV data from `source_path` into an initial `pandas.DataFrame`.""" 

75 df = files.read_csv(source_path, usecols=self.TOGGL_COLUMNS, parse_dates=["Start date"], cache_dates=True) 

76 

77 df["Start time"] = df["Start time"].apply(self._str_timedelta) 

78 df["Duration"] = df["Duration"].apply(self._str_timedelta) 

79 

80 # assign First and Last name 

81 df["First name"] = df["Email"].apply(self._get_first_name) 

82 df["Last name"] = df["Email"].apply(self._get_last_name) 

83 

84 # calculate hours as a decimal from duration timedelta 

85 df["Hours"] = (df["Duration"].dt.total_seconds() / 3600).round(2) 

86 

87 # if there is a Task column, prepend to the Description column and remove 

88 if "Task" in df.columns: 88 ↛ 94line 88 didn't jump to line 94 because the condition on line 88 was always true

89 df["Description"] = df.apply( 

90 lambda row: (f"[{row['Task']}] {row['Description']}" if pd.notna(row["Task"]) else row["Description"]), axis=1 

91 ) 

92 df.drop(columns=["Task"], inplace=True) 

93 

94 df.sort_values(["Start date", "Start time", "Email"], inplace=True) 

95 

96 if column_renames: 

97 df.rename(columns=column_renames, inplace=True) 

98 

99 return df 

100 

101 def _str_timedelta(self, td: str): 

102 """Convert a string formatted duration (e.g. 01:30) to a timedelta.""" 

103 return pd.to_timedelta(pd.to_datetime(td, format="%H:%M:%S").strftime("%H:%M:%S")) 

104 

105 def convert_to_harvest( 

106 self, 

107 source_path: str | TextIO = sys.stdin, 

108 output_path: str | TextIO = sys.stdout, 

109 output_cols: list[str] = HARVEST_COLUMNS, 

110 client_name: str = None, 

111 **kwargs, 

112 ): 

113 """Convert Toggl formatted entries in source_path to equivalent Harvest formatted entries. 

114 

115 Args: 

116 source_path: The path to a readable CSV file of Toggl time entries; or a readable buffer of the same. 

117 

118 output_path: The path to a CSV file where Harvest time entries will be written; or a writeable buffer for the same. 

119 

120 output_cols (list[str]): A list of column names for the output 

121 

122 client_name (str): The value to assign in the output "Client" field 

123 

124 Returns: 

125 None. Either prints the resulting CSV data or writes to output_path. 

126 """ 

127 if client_name is None: 

128 client_name = os.environ.get("HARVEST_CLIENT_NAME") 

129 

130 source = self._prepare_input( 

131 source_path=source_path, column_renames={"Project": "Project", "Description": "Notes", "Start date": "Date"} 

132 ) 

133 

134 # update static calculated columns 

135 source["Client"] = client_name 

136 source["Task"] = "Project Consulting" 

137 

138 # get cached project name if any, keyed on Toggl project name 

139 info = self.project_info() 

140 source["Project"] = source["Project"].apply(lambda x: info.get(key=x, default=x)) 

141 

142 # find duplicates based on a subset of columns 

143 cols = ["Date", "Hours", "First name", "Last name", "Notes"] 

144 is_duplicate = source.duplicated(subset=cols, keep=False) 

145 

146 if is_duplicate.any(): 

147 # Create a counter for the duplicate rows 

148 counter = source[is_duplicate].groupby(cols).cumcount() + 1 

149 group_size = source[is_duplicate].groupby(cols)["Notes"].transform("size") 

150 

151 # Update the 'Notes' column with the counter 

152 source.loc[is_duplicate, "Notes"] = ( 

153 source.loc[is_duplicate, "Notes"] + " (" + counter.astype(str) + "/" + group_size.astype(str) + ")" 

154 ) 

155 

156 files.write_csv(output_path, source, columns=output_cols) 

157 

158 def convert_to_justworks( 

159 self, 

160 source_path: str | TextIO = sys.stdin, 

161 output_path: str | TextIO = sys.stdout, 

162 output_cols: list[str] = JUSTWORKS_COLUMNS, 

163 **kwargs, 

164 ): 

165 """Convert Toggl formatted entries in source_path to equivalent Justworks formatted entries. 

166 

167 Args: 

168 source_path: The path to a readable CSV file of Toggl time entries; or a readable buffer of the same. 

169 

170 output_path: The path to a CSV file where Harvest time entries will be written; or a writeable buffer for the same. 

171 

172 output_cols (list[str]): A list of column names for the output 

173 

174 Returns: 

175 None. Either prints the resulting CSV data or writes to output_path. 

176 """ 

177 source = self._prepare_input( 

178 source_path=source_path, 

179 column_renames={ 

180 "Email": "Work Email", 

181 "First name": "First Name", 

182 "Hours": "Regular Hours", 

183 "Last name": "Last Name", 

184 "Start date": "Start Date", 

185 }, 

186 ) 

187 

188 # aggregate hours per person per day 

189 cols = ["Work Email", "First Name", "Last Name", "Start Date"] 

190 people = source.sort_values(cols).groupby(cols, observed=False) 

191 people_agg = people.agg({"Regular Hours": "sum"}) 

192 people_agg.reset_index(inplace=True) 

193 

194 # aggregate hours per person and rollup to the week (starting on Sunday) 

195 cols = ["Work Email", "First Name", "Last Name"] 

196 weekly_agg = people_agg.groupby(cols).resample("W", label="left", on="Start Date") 

197 weekly_agg = weekly_agg["Regular Hours"].sum().reset_index() 

198 

199 # calculate the week end date (the following Saturday) 

200 weekly_agg["End Date"] = weekly_agg["Start Date"] + pd.Timedelta(days=6) 

201 

202 files.write_csv(output_path, weekly_agg, columns=output_cols) 

203 

204 def download( 

205 self, 

206 start_date: datetime, 

207 end_date: datetime, 

208 output_path: str | TextIO = sys.stdout, 

209 output_cols: list[str] | None = TOGGL_COLUMNS, 

210 **kwargs, 

211 ): 

212 """Download a CSV report from Toggl of detailed time entries for the given date range. 

213 

214 Args: 

215 start_date (datetime): The beginning of the reporting period. 

216 

217 end_date (str): The end of the reporting period. 

218 

219 output_path: The path to a CSV file where Toggl time entries will be written; or a writeable buffer for the same. 

220 

221 output_cols (list[str]): A list of column names for the output. 

222 

223 Extra kwargs are passed along in the POST request body. 

224 

225 Returns: 

226 None. Either prints the resulting CSV data or writes to output_path. 

227 """ 

228 response = self.api_reports.detailed_time_entries(start_date, end_date, **kwargs) 

229 # the raw response has these initial 3 bytes: 

230 # 

231 # b"\xef\xbb\xbfUser,Email,Client..." 

232 # 

233 # \xef\xbb\xb is the Byte Order Mark (BOM) sometimes used in unicode text files 

234 # these 3 bytes indicate a utf-8 encoded text file 

235 # 

236 # See more 

237 # - https://en.wikipedia.org/wiki/Byte_order_mark 

238 # - https://stackoverflow.com/a/50131187 

239 csv = response.content.decode("utf-8-sig") 

240 

241 df = pd.read_csv(io.StringIO(csv)) 

242 files.write_csv(output_path, df, columns=output_cols) 

243 

244 def lock(self, lock_date: datetime): 

245 """Lock time entries on the given date in the Toggl workspace. 

246 

247 Args: 

248 lock_date (datetime): The date to lock time entries. 

249 """ 

250 lock_date_str = lock_date.strftime("%Y-%m-%d") 

251 self.api_workspace.update_preferences(report_locked_at=lock_date_str) 

252 

253 def normalize_summary(self, toggl_summary: TimeSummary) -> TimeSummary: 

254 """Normalize a Toggl TimeSummary to match the Harvest format.""" 

255 info = self.project_info() 

256 new_summary = TimeSummary( 

257 earliest_date=toggl_summary.earliest_date, 

258 latest_date=toggl_summary.latest_date, 

259 total_rows=toggl_summary.total_rows, 

260 total_hours=toggl_summary.total_hours, 

261 ) 

262 

263 for project, hours in toggl_summary.hours_per_project.items(): 

264 harvest_project = info.get(key=project, default=project) 

265 new_summary.hours_per_project[harvest_project] = hours 

266 

267 for email, projects in toggl_summary.hours_per_user_project.items(): 

268 first_name = self._get_first_name(email) 

269 last_name = self._get_last_name(email) 

270 user = f"{first_name} {last_name}" 

271 new_summary.hours_per_user_project[user] = {} 

272 for project, hours in projects.items(): 

273 harvest_project = info.get(key=project, default=project) 

274 new_summary.hours_per_user_project[user][harvest_project] = hours 

275 

276 return new_summary 

277 

278 def summarize(self, path: str | TextIO) -> TimeSummary: 

279 """Summarize a Toggl CSV file. 

280 

281 Args: 

282 path (str | TextIO): The path to a readable CSV file of Toggl time entries; or a readable buffer of the same. 

283 

284 Returns: 

285 TimeSummary: A summary of the time entries. 

286 """ 

287 source = files.read_csv(path, usecols=self.TOGGL_COLUMNS, parse_dates=["Start date"], cache_dates=True) 

288 

289 # calculate hours as a decimal from duration string 

290 source["Hours"] = (pd.to_timedelta(source["Duration"]).dt.total_seconds() / 3600).round(2) 

291 

292 summary = TimeSummary( 

293 earliest_date=source["Start date"].min().date(), 

294 latest_date=source["Start date"].max().date(), 

295 total_rows=len(source), 

296 total_hours=source["Hours"].sum(), 

297 ) 

298 

299 # Group by Project to get hours per project 

300 project_hours = source.groupby(["Project"])["Hours"].sum().to_dict() 

301 summary.hours_per_project = project_hours 

302 

303 # Group by User and Project to get hours per user/project 

304 user_project_hours = source.groupby(["Email", "Project"])["Hours"].sum().to_dict() 

305 # create a nested dict of the form {user: {project: hours}} 

306 for (email, project), hours in user_project_hours.items(): 

307 if email not in summary.hours_per_user_project: 

308 summary.hours_per_user_project[email] = {} 

309 summary.hours_per_user_project[email][project] = hours 

310 

311 return summary 

312 

313 

314class TogglUsers(TogglService): 

315 def get_organization_group(self, name: str) -> dict: 

316 """Get group of users from the Toggl organization. 

317 

318 Args: 

319 name (str): The name of the group. 

320 

321 Returns: 

322 dict: The resulting JSON data of the group. 

323 """ 

324 response = self.api_organization.get_groups(name=name) 

325 json = response.json() 

326 return json[0] if json else None 

327 

328 def get_organization_groups(self, group_names: list[str] = []) -> list[dict]: 

329 """Get a list of users from the Toggl organization. 

330 

331 Args: 

332 group_names (list[str]): Return only groups with a name matching one of the provided names. 

333 

334 Returns: 

335 list[dict]: The resulting JSON data of groups. 

336 """ 

337 data = [] 

338 

339 if len(group_names) > 0: 

340 for group in group_names: 

341 json = self.get_organization_group(group) 

342 data.extend([json]) 

343 else: 

344 response = self.api_organization.get_groups() 

345 json = response.json() 

346 data.extend(json) 

347 

348 return data 

349 

350 def get_organization_users(self, inactive: bool = False, groups: list[str] = [], **kwargs) -> dict: 

351 """Get a list of users from the Toggl organization. 

352 

353 Args: 

354 inactive (bool): True to get inactive users. False (the default) to get only active users. 

355 groups (list[str]): An optional list of Toggl group ids, returning only users in any one of the given groups. 

356 

357 Returns: 

358 dict: The resulting JSON data of users. 

359 """ 

360 if inactive: 

361 active_status = "inactive,invited" 

362 else: 

363 active_status = "active" 

364 kwargs["active_status"] = active_status 

365 

366 if groups: 

367 kwargs["groups"] = ",".join((str(g) for g in groups)) 

368 

369 response = self.api_organization.get_users(**kwargs) 

370 json = response.json() 

371 

372 return json 

373 

374 def get_workspace_users(self, **kwargs) -> dict: 

375 """Get a list of users from the Toggl workspace. 

376 

377 Returns: 

378 dict: The resulting JSON data of users. 

379 """ 

380 response = self.api_workspace.get_users(**kwargs) 

381 json = response.json() 

382 return json