Coverage for compiler_admin/services/toggl.py: 99%

1import io

2import os

3import sys

4from datetime import datetime

5from functools import cache

6from typing import TextIO

8import pandas as pd

10import compiler_admin.services.files as files

11from compiler_admin.api.toggl import TogglOrganization, TogglReports, TogglWorkspace

12from compiler_admin.services.google import GoogleAccount

13from compiler_admin.services.time import TimeSummary

15GROUPS = dict(contractors="Contractors", service_accounts="Service Accounts", staff="Staff", partners="Partners")

18class TogglService:

19 def __init__(self):

20 token = os.environ.get("TOGGL_API_TOKEN")

21 workspace = os.environ.get("TOGGL_WORKSPACE_ID")

22 organization = os.environ.get("TOGGL_ORGANIZATION_ID")

23 self.api_organization = TogglOrganization(token, workspace, organization)

24 self.api_reports = TogglReports(token, workspace)

25 self.api_workspace = TogglWorkspace(token, workspace)

28class TogglTime(TogglService):

29 # input columns needed for conversion

30 TOGGL_COLUMNS = ["Email", "Project", "Task", "Client", "Start date", "Start time", "Duration", "Description"]

32 # default output CSV columns for Harvest

33 HARVEST_COLUMNS = ["Date", "Client", "Project", "Task", "Notes", "Hours", "First name", "Last name"]

34 # default output CSV columns for Justworks

35 JUSTWORKS_COLUMNS = ["First Name", "Last Name", "Work Email", "Start Date", "End Date", "Regular Hours"]

37 def __init__(self):

38 super().__init__()

39 self.converters = {"harvest": self.convert_to_harvest, "justworks": self.convert_to_justworks}

41 @cache

42 def project_info(self):

43 """Cache of previously seen project information, keyed on Toggl project name."""

44 return files.JsonFileCache("TOGGL_PROJECT_INFO")

46 @cache

47 def user_info(self):

48 """Cache of previously seen user information, keyed on email."""

49 return files.JsonFileCache("TOGGL_USER_INFO")

51 def _get_name(self, email: str, name_key) -> str:

52 """Get cached name or query from Google."""

53 info = self.user_info()

54 user = info.get(email)

55 name = user.get(name_key) if user else None

56 if name is None:

57 user = GoogleAccount(email).get_info()

58 name = user.get(name_key) if user else None

59 if email in info:

60 info[email].update(user)

61 else:

62 info[email] = user

63 return name

65 def _get_first_name(self, email: str) -> str:

66 """Get cached last name or query from Google."""

67 return self._get_name(email, "First Name")

69 def _get_last_name(self, email: str):

70 """Get cached last name or query from Google."""

71 return self._get_name(email, "Last Name")

73 def _prepare_input(self, source_path: str | TextIO, column_renames: dict = {}) -> pd.DataFrame:

74 """Parse and prepare CSV data from `source_path` into an initial `pandas.DataFrame`."""

75 df = files.read_csv(source_path, usecols=self.TOGGL_COLUMNS, parse_dates=["Start date"], cache_dates=True)

77 df["Start time"] = df["Start time"].apply(self._str_timedelta)

78 df["Duration"] = df["Duration"].apply(self._str_timedelta)

80 # assign First and Last name

81 df["First name"] = df["Email"].apply(self._get_first_name)

82 df["Last name"] = df["Email"].apply(self._get_last_name)

84 # calculate hours as a decimal from duration timedelta

85 df["Hours"] = (df["Duration"].dt.total_seconds() / 3600).round(2)

87 # if there is a Task column, prepend to the Description column and remove

88 if "Task" in df.columns: 88 ↛ 94line 88 didn't jump to line 94 because the condition on line 88 was always true

89 df["Description"] = df.apply(

90 lambda row: (f"[{row['Task']}] {row['Description']}" if pd.notna(row["Task"]) else row["Description"]), axis=1

91 )

92 df.drop(columns=["Task"], inplace=True)

94 df.sort_values(["Start date", "Start time", "Email"], inplace=True)

96 if column_renames:

97 df.rename(columns=column_renames, inplace=True)

99 return df

100

101 def _str_timedelta(self, td: str):

102 """Convert a string formatted duration (e.g. 01:30) to a timedelta."""

103 return pd.to_timedelta(pd.to_datetime(td, format="%H:%M:%S").strftime("%H:%M:%S"))

104

105 def convert_to_harvest(

106 self,

107 source_path: str | TextIO = sys.stdin,

108 output_path: str | TextIO = sys.stdout,

109 output_cols: list[str] = HARVEST_COLUMNS,

110 client_name: str = None,

111 **kwargs,

112 ):

113 """Convert Toggl formatted entries in source_path to equivalent Harvest formatted entries.

114

115 Args:

116 source_path: The path to a readable CSV file of Toggl time entries; or a readable buffer of the same.

117

118 output_path: The path to a CSV file where Harvest time entries will be written; or a writeable buffer for the same.

119

120 output_cols (list[str]): A list of column names for the output

121

122 client_name (str): The value to assign in the output "Client" field

123

124 Returns:

125 None. Either prints the resulting CSV data or writes to output_path.

126 """

127 if client_name is None:

128 client_name = os.environ.get("HARVEST_CLIENT_NAME")

129

130 source = self._prepare_input(

131 source_path=source_path, column_renames={"Project": "Project", "Description": "Notes", "Start date": "Date"}

132 )

133

134 # update static calculated columns

135 source["Client"] = client_name

136 source["Task"] = "Project Consulting"

137

138 # get cached project name if any, keyed on Toggl project name

139 info = self.project_info()

140 source["Project"] = source["Project"].apply(lambda x: info.get(key=x, default=x))

141

142 # find duplicates based on a subset of columns

143 cols = ["Date", "Hours", "First name", "Last name", "Notes"]

144 is_duplicate = source.duplicated(subset=cols, keep=False)

145

146 if is_duplicate.any():

147 # Create a counter for the duplicate rows

148 counter = source[is_duplicate].groupby(cols).cumcount() + 1

149 group_size = source[is_duplicate].groupby(cols)["Notes"].transform("size")

150

151 # Update the 'Notes' column with the counter

152 source.loc[is_duplicate, "Notes"] = (

153 source.loc[is_duplicate, "Notes"] + " (" + counter.astype(str) + "/" + group_size.astype(str) + ")"

154 )

155

156 files.write_csv(output_path, source, columns=output_cols)

157

158 def convert_to_justworks(

159 self,

160 source_path: str | TextIO = sys.stdin,

161 output_path: str | TextIO = sys.stdout,

162 output_cols: list[str] = JUSTWORKS_COLUMNS,

163 **kwargs,

164 ):

165 """Convert Toggl formatted entries in source_path to equivalent Justworks formatted entries.

166

167 Args:

168 source_path: The path to a readable CSV file of Toggl time entries; or a readable buffer of the same.

169

170 output_path: The path to a CSV file where Harvest time entries will be written; or a writeable buffer for the same.

171

172 output_cols (list[str]): A list of column names for the output

173

174 Returns:

175 None. Either prints the resulting CSV data or writes to output_path.

176 """

177 source = self._prepare_input(

178 source_path=source_path,

179 column_renames={

180 "Email": "Work Email",

181 "First name": "First Name",

182 "Hours": "Regular Hours",

183 "Last name": "Last Name",

184 "Start date": "Start Date",

185 },

186 )

187

188 # aggregate hours per person per day

189 cols = ["Work Email", "First Name", "Last Name", "Start Date"]

190 people = source.sort_values(cols).groupby(cols, observed=False)

191 people_agg = people.agg({"Regular Hours": "sum"})

192 people_agg.reset_index(inplace=True)

193

194 # aggregate hours per person and rollup to the week (starting on Sunday)

195 cols = ["Work Email", "First Name", "Last Name"]

196 weekly_agg = people_agg.groupby(cols).resample("W", label="left", on="Start Date")

197 weekly_agg = weekly_agg["Regular Hours"].sum().reset_index()

198

199 # calculate the week end date (the following Saturday)

200 weekly_agg["End Date"] = weekly_agg["Start Date"] + pd.Timedelta(days=6)

201

202 files.write_csv(output_path, weekly_agg, columns=output_cols)

203

204 def download(

205 self,

206 start_date: datetime,

207 end_date: datetime,

208 output_path: str | TextIO = sys.stdout,

209 output_cols: list[str] | None = TOGGL_COLUMNS,

210 **kwargs,

211 ):

212 """Download a CSV report from Toggl of detailed time entries for the given date range.

213

214 Args:

215 start_date (datetime): The beginning of the reporting period.

216

217 end_date (str): The end of the reporting period.

218

219 output_path: The path to a CSV file where Toggl time entries will be written; or a writeable buffer for the same.

220

221 output_cols (list[str]): A list of column names for the output.

222

223 Extra kwargs are passed along in the POST request body.

224

225 Returns:

226 None. Either prints the resulting CSV data or writes to output_path.

227 """

228 response = self.api_reports.detailed_time_entries(start_date, end_date, **kwargs)

229 # the raw response has these initial 3 bytes:

230 #

231 # b"\xef\xbb\xbfUser,Email,Client..."

232 #

233 # \xef\xbb\xb is the Byte Order Mark (BOM) sometimes used in unicode text files

234 # these 3 bytes indicate a utf-8 encoded text file

235 #

236 # See more

237 # - https://en.wikipedia.org/wiki/Byte_order_mark

238 # - https://stackoverflow.com/a/50131187

239 csv = response.content.decode("utf-8-sig")

240

241 df = pd.read_csv(io.StringIO(csv))

242 files.write_csv(output_path, df, columns=output_cols)

243

244 def lock(self, lock_date: datetime):

245 """Lock time entries on the given date in the Toggl workspace.

246

247 Args:

248 lock_date (datetime): The date to lock time entries.

249 """

250 lock_date_str = lock_date.strftime("%Y-%m-%d")

251 self.api_workspace.update_preferences(report_locked_at=lock_date_str)

252

253 def normalize_summary(self, toggl_summary: TimeSummary) -> TimeSummary:

254 """Normalize a Toggl TimeSummary to match the Harvest format."""

255 info = self.project_info()

256 new_summary = TimeSummary(

257 earliest_date=toggl_summary.earliest_date,

258 latest_date=toggl_summary.latest_date,

259 total_rows=toggl_summary.total_rows,

260 total_hours=toggl_summary.total_hours,

261 )

262

263 for project, hours in toggl_summary.hours_per_project.items():

264 harvest_project = info.get(key=project, default=project)

265 new_summary.hours_per_project[harvest_project] = hours

266

267 for email, projects in toggl_summary.hours_per_user_project.items():

268 first_name = self._get_first_name(email)

269 last_name = self._get_last_name(email)

270 user = f"{first_name} {last_name}"

271 new_summary.hours_per_user_project[user] = {}

272 for project, hours in projects.items():

273 harvest_project = info.get(key=project, default=project)

274 new_summary.hours_per_user_project[user][harvest_project] = hours

275

276 return new_summary

277

278 def summarize(self, path: str | TextIO) -> TimeSummary:

279 """Summarize a Toggl CSV file.

280

281 Args:

282 path (str | TextIO): The path to a readable CSV file of Toggl time entries; or a readable buffer of the same.

283

284 Returns:

285 TimeSummary: A summary of the time entries.

286 """

287 source = files.read_csv(path, usecols=self.TOGGL_COLUMNS, parse_dates=["Start date"], cache_dates=True)

288

289 # calculate hours as a decimal from duration string

290 source["Hours"] = (pd.to_timedelta(source["Duration"]).dt.total_seconds() / 3600).round(2)

291

292 summary = TimeSummary(

293 earliest_date=source["Start date"].min().date(),

294 latest_date=source["Start date"].max().date(),

295 total_rows=len(source),

296 total_hours=source["Hours"].sum(),

297 )

298

299 # Group by Project to get hours per project

300 project_hours = source.groupby(["Project"])["Hours"].sum().to_dict()

301 summary.hours_per_project = project_hours

302

303 # Group by User and Project to get hours per user/project

304 user_project_hours = source.groupby(["Email", "Project"])["Hours"].sum().to_dict()

305 # create a nested dict of the form {user: {project: hours}}

306 for (email, project), hours in user_project_hours.items():

307 if email not in summary.hours_per_user_project:

308 summary.hours_per_user_project[email] = {}

309 summary.hours_per_user_project[email][project] = hours

310

311 return summary

312

313

314class TogglUsers(TogglService):

315 def get_organization_group(self, name: str) -> dict:

316 """Get group of users from the Toggl organization.

317

318 Args:

319 name (str): The name of the group.

320

321 Returns:

322 dict: The resulting JSON data of the group.

323 """

324 response = self.api_organization.get_groups(name=name)

325 json = response.json()

326 return json[0] if json else None

327

328 def get_organization_groups(self, group_names: list[str] = []) -> list[dict]:

329 """Get a list of users from the Toggl organization.

330

331 Args:

332 group_names (list[str]): Return only groups with a name matching one of the provided names.

333

334 Returns:

335 list[dict]: The resulting JSON data of groups.

336 """

337 data = []

338

339 if len(group_names) > 0:

340 for group in group_names:

341 json = self.get_organization_group(group)

342 data.extend([json])

343 else:

344 response = self.api_organization.get_groups()

345 json = response.json()

346 data.extend(json)

347

348 return data

349

350 def get_organization_users(self, inactive: bool = False, groups: list[str] = [], **kwargs) -> dict:

351 """Get a list of users from the Toggl organization.

352

353 Args:

354 inactive (bool): True to get inactive users. False (the default) to get only active users.

355 groups (list[str]): An optional list of Toggl group ids, returning only users in any one of the given groups.

356

357 Returns:

358 dict: The resulting JSON data of users.

359 """

360 if inactive:

361 active_status = "inactive,invited"

362 else:

363 active_status = "active"

364 kwargs["active_status"] = active_status

365

366 if groups:

367 kwargs["groups"] = ",".join((str(g) for g in groups))

368

369 response = self.api_organization.get_users(**kwargs)

370 json = response.json()

371

372 return json

373

374 def get_workspace_users(self, **kwargs) -> dict:

375 """Get a list of users from the Toggl workspace.

376

377 Returns:

378 dict: The resulting JSON data of users.

379 """

380 response = self.api_workspace.get_users(**kwargs)

381 json = response.json()

382 return json

Coverage for compiler_admin / services / toggl.py: 99%

154 statements