Coverage for compiler_admin / services / toggl.py: 99%
154 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-28 05:48 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-28 05:48 +0000
1import io
2import os
3import sys
4from datetime import datetime
5from functools import cache
6from typing import TextIO
8import pandas as pd
10import compiler_admin.services.files as files
11from compiler_admin.api.toggl import TogglOrganization, TogglReports, TogglWorkspace
12from compiler_admin.services.google import GoogleAccount
13from compiler_admin.services.time import TimeSummary
15GROUPS = dict(contractors="Contractors", service_accounts="Service Accounts", staff="Staff", partners="Partners")
18class TogglService:
19 def __init__(self):
20 token = os.environ.get("TOGGL_API_TOKEN")
21 workspace = os.environ.get("TOGGL_WORKSPACE_ID")
22 organization = os.environ.get("TOGGL_ORGANIZATION_ID")
23 self.api_organization = TogglOrganization(token, workspace, organization)
24 self.api_reports = TogglReports(token, workspace)
25 self.api_workspace = TogglWorkspace(token, workspace)
28class TogglTime(TogglService):
29 # input columns needed for conversion
30 TOGGL_COLUMNS = ["Email", "Project", "Task", "Client", "Start date", "Start time", "Duration", "Description"]
32 # default output CSV columns for Harvest
33 HARVEST_COLUMNS = ["Date", "Client", "Project", "Task", "Notes", "Hours", "First name", "Last name"]
34 # default output CSV columns for Justworks
35 JUSTWORKS_COLUMNS = ["First Name", "Last Name", "Work Email", "Start Date", "End Date", "Regular Hours"]
37 def __init__(self):
38 super().__init__()
39 self.converters = {"harvest": self.convert_to_harvest, "justworks": self.convert_to_justworks}
41 @cache
42 def project_info(self):
43 """Cache of previously seen project information, keyed on Toggl project name."""
44 return files.JsonFileCache("TOGGL_PROJECT_INFO")
46 @cache
47 def user_info(self):
48 """Cache of previously seen user information, keyed on email."""
49 return files.JsonFileCache("TOGGL_USER_INFO")
51 def _get_name(self, email: str, name_key) -> str:
52 """Get cached name or query from Google."""
53 info = self.user_info()
54 user = info.get(email)
55 name = user.get(name_key) if user else None
56 if name is None:
57 user = GoogleAccount(email).get_info()
58 name = user.get(name_key) if user else None
59 if email in info:
60 info[email].update(user)
61 else:
62 info[email] = user
63 return name
65 def _get_first_name(self, email: str) -> str:
66 """Get cached last name or query from Google."""
67 return self._get_name(email, "First Name")
69 def _get_last_name(self, email: str):
70 """Get cached last name or query from Google."""
71 return self._get_name(email, "Last Name")
73 def _prepare_input(self, source_path: str | TextIO, column_renames: dict = {}) -> pd.DataFrame:
74 """Parse and prepare CSV data from `source_path` into an initial `pandas.DataFrame`."""
75 df = files.read_csv(source_path, usecols=self.TOGGL_COLUMNS, parse_dates=["Start date"], cache_dates=True)
77 df["Start time"] = df["Start time"].apply(self._str_timedelta)
78 df["Duration"] = df["Duration"].apply(self._str_timedelta)
80 # assign First and Last name
81 df["First name"] = df["Email"].apply(self._get_first_name)
82 df["Last name"] = df["Email"].apply(self._get_last_name)
84 # calculate hours as a decimal from duration timedelta
85 df["Hours"] = (df["Duration"].dt.total_seconds() / 3600).round(2)
87 # if there is a Task column, prepend to the Description column and remove
88 if "Task" in df.columns: 88 ↛ 94line 88 didn't jump to line 94 because the condition on line 88 was always true
89 df["Description"] = df.apply(
90 lambda row: (f"[{row['Task']}] {row['Description']}" if pd.notna(row["Task"]) else row["Description"]), axis=1
91 )
92 df.drop(columns=["Task"], inplace=True)
94 df.sort_values(["Start date", "Start time", "Email"], inplace=True)
96 if column_renames:
97 df.rename(columns=column_renames, inplace=True)
99 return df
101 def _str_timedelta(self, td: str):
102 """Convert a string formatted duration (e.g. 01:30) to a timedelta."""
103 return pd.to_timedelta(pd.to_datetime(td, format="%H:%M:%S").strftime("%H:%M:%S"))
105 def convert_to_harvest(
106 self,
107 source_path: str | TextIO = sys.stdin,
108 output_path: str | TextIO = sys.stdout,
109 output_cols: list[str] = HARVEST_COLUMNS,
110 client_name: str = None,
111 **kwargs,
112 ):
113 """Convert Toggl formatted entries in source_path to equivalent Harvest formatted entries.
115 Args:
116 source_path: The path to a readable CSV file of Toggl time entries; or a readable buffer of the same.
118 output_path: The path to a CSV file where Harvest time entries will be written; or a writeable buffer for the same.
120 output_cols (list[str]): A list of column names for the output
122 client_name (str): The value to assign in the output "Client" field
124 Returns:
125 None. Either prints the resulting CSV data or writes to output_path.
126 """
127 if client_name is None:
128 client_name = os.environ.get("HARVEST_CLIENT_NAME")
130 source = self._prepare_input(
131 source_path=source_path, column_renames={"Project": "Project", "Description": "Notes", "Start date": "Date"}
132 )
134 # update static calculated columns
135 source["Client"] = client_name
136 source["Task"] = "Project Consulting"
138 # get cached project name if any, keyed on Toggl project name
139 info = self.project_info()
140 source["Project"] = source["Project"].apply(lambda x: info.get(key=x, default=x))
142 # find duplicates based on a subset of columns
143 cols = ["Date", "Hours", "First name", "Last name", "Notes"]
144 is_duplicate = source.duplicated(subset=cols, keep=False)
146 if is_duplicate.any():
147 # Create a counter for the duplicate rows
148 counter = source[is_duplicate].groupby(cols).cumcount() + 1
149 group_size = source[is_duplicate].groupby(cols)["Notes"].transform("size")
151 # Update the 'Notes' column with the counter
152 source.loc[is_duplicate, "Notes"] = (
153 source.loc[is_duplicate, "Notes"] + " (" + counter.astype(str) + "/" + group_size.astype(str) + ")"
154 )
156 files.write_csv(output_path, source, columns=output_cols)
158 def convert_to_justworks(
159 self,
160 source_path: str | TextIO = sys.stdin,
161 output_path: str | TextIO = sys.stdout,
162 output_cols: list[str] = JUSTWORKS_COLUMNS,
163 **kwargs,
164 ):
165 """Convert Toggl formatted entries in source_path to equivalent Justworks formatted entries.
167 Args:
168 source_path: The path to a readable CSV file of Toggl time entries; or a readable buffer of the same.
170 output_path: The path to a CSV file where Harvest time entries will be written; or a writeable buffer for the same.
172 output_cols (list[str]): A list of column names for the output
174 Returns:
175 None. Either prints the resulting CSV data or writes to output_path.
176 """
177 source = self._prepare_input(
178 source_path=source_path,
179 column_renames={
180 "Email": "Work Email",
181 "First name": "First Name",
182 "Hours": "Regular Hours",
183 "Last name": "Last Name",
184 "Start date": "Start Date",
185 },
186 )
188 # aggregate hours per person per day
189 cols = ["Work Email", "First Name", "Last Name", "Start Date"]
190 people = source.sort_values(cols).groupby(cols, observed=False)
191 people_agg = people.agg({"Regular Hours": "sum"})
192 people_agg.reset_index(inplace=True)
194 # aggregate hours per person and rollup to the week (starting on Sunday)
195 cols = ["Work Email", "First Name", "Last Name"]
196 weekly_agg = people_agg.groupby(cols).resample("W", label="left", on="Start Date")
197 weekly_agg = weekly_agg["Regular Hours"].sum().reset_index()
199 # calculate the week end date (the following Saturday)
200 weekly_agg["End Date"] = weekly_agg["Start Date"] + pd.Timedelta(days=6)
202 files.write_csv(output_path, weekly_agg, columns=output_cols)
204 def download(
205 self,
206 start_date: datetime,
207 end_date: datetime,
208 output_path: str | TextIO = sys.stdout,
209 output_cols: list[str] | None = TOGGL_COLUMNS,
210 **kwargs,
211 ):
212 """Download a CSV report from Toggl of detailed time entries for the given date range.
214 Args:
215 start_date (datetime): The beginning of the reporting period.
217 end_date (str): The end of the reporting period.
219 output_path: The path to a CSV file where Toggl time entries will be written; or a writeable buffer for the same.
221 output_cols (list[str]): A list of column names for the output.
223 Extra kwargs are passed along in the POST request body.
225 Returns:
226 None. Either prints the resulting CSV data or writes to output_path.
227 """
228 response = self.api_reports.detailed_time_entries(start_date, end_date, **kwargs)
229 # the raw response has these initial 3 bytes:
230 #
231 # b"\xef\xbb\xbfUser,Email,Client..."
232 #
233 # \xef\xbb\xb is the Byte Order Mark (BOM) sometimes used in unicode text files
234 # these 3 bytes indicate a utf-8 encoded text file
235 #
236 # See more
237 # - https://en.wikipedia.org/wiki/Byte_order_mark
238 # - https://stackoverflow.com/a/50131187
239 csv = response.content.decode("utf-8-sig")
241 df = pd.read_csv(io.StringIO(csv))
242 files.write_csv(output_path, df, columns=output_cols)
244 def lock(self, lock_date: datetime):
245 """Lock time entries on the given date in the Toggl workspace.
247 Args:
248 lock_date (datetime): The date to lock time entries.
249 """
250 lock_date_str = lock_date.strftime("%Y-%m-%d")
251 self.api_workspace.update_preferences(report_locked_at=lock_date_str)
253 def normalize_summary(self, toggl_summary: TimeSummary) -> TimeSummary:
254 """Normalize a Toggl TimeSummary to match the Harvest format."""
255 info = self.project_info()
256 new_summary = TimeSummary(
257 earliest_date=toggl_summary.earliest_date,
258 latest_date=toggl_summary.latest_date,
259 total_rows=toggl_summary.total_rows,
260 total_hours=toggl_summary.total_hours,
261 )
263 for project, hours in toggl_summary.hours_per_project.items():
264 harvest_project = info.get(key=project, default=project)
265 new_summary.hours_per_project[harvest_project] = hours
267 for email, projects in toggl_summary.hours_per_user_project.items():
268 first_name = self._get_first_name(email)
269 last_name = self._get_last_name(email)
270 user = f"{first_name} {last_name}"
271 new_summary.hours_per_user_project[user] = {}
272 for project, hours in projects.items():
273 harvest_project = info.get(key=project, default=project)
274 new_summary.hours_per_user_project[user][harvest_project] = hours
276 return new_summary
278 def summarize(self, path: str | TextIO) -> TimeSummary:
279 """Summarize a Toggl CSV file.
281 Args:
282 path (str | TextIO): The path to a readable CSV file of Toggl time entries; or a readable buffer of the same.
284 Returns:
285 TimeSummary: A summary of the time entries.
286 """
287 source = files.read_csv(path, usecols=self.TOGGL_COLUMNS, parse_dates=["Start date"], cache_dates=True)
289 # calculate hours as a decimal from duration string
290 source["Hours"] = (pd.to_timedelta(source["Duration"]).dt.total_seconds() / 3600).round(2)
292 summary = TimeSummary(
293 earliest_date=source["Start date"].min().date(),
294 latest_date=source["Start date"].max().date(),
295 total_rows=len(source),
296 total_hours=source["Hours"].sum(),
297 )
299 # Group by Project to get hours per project
300 project_hours = source.groupby(["Project"])["Hours"].sum().to_dict()
301 summary.hours_per_project = project_hours
303 # Group by User and Project to get hours per user/project
304 user_project_hours = source.groupby(["Email", "Project"])["Hours"].sum().to_dict()
305 # create a nested dict of the form {user: {project: hours}}
306 for (email, project), hours in user_project_hours.items():
307 if email not in summary.hours_per_user_project:
308 summary.hours_per_user_project[email] = {}
309 summary.hours_per_user_project[email][project] = hours
311 return summary
314class TogglUsers(TogglService):
315 def get_organization_group(self, name: str) -> dict:
316 """Get group of users from the Toggl organization.
318 Args:
319 name (str): The name of the group.
321 Returns:
322 dict: The resulting JSON data of the group.
323 """
324 response = self.api_organization.get_groups(name=name)
325 json = response.json()
326 return json[0] if json else None
328 def get_organization_groups(self, group_names: list[str] = []) -> list[dict]:
329 """Get a list of users from the Toggl organization.
331 Args:
332 group_names (list[str]): Return only groups with a name matching one of the provided names.
334 Returns:
335 list[dict]: The resulting JSON data of groups.
336 """
337 data = []
339 if len(group_names) > 0:
340 for group in group_names:
341 json = self.get_organization_group(group)
342 data.extend([json])
343 else:
344 response = self.api_organization.get_groups()
345 json = response.json()
346 data.extend(json)
348 return data
350 def get_organization_users(self, inactive: bool = False, groups: list[str] = [], **kwargs) -> dict:
351 """Get a list of users from the Toggl organization.
353 Args:
354 inactive (bool): True to get inactive users. False (the default) to get only active users.
355 groups (list[str]): An optional list of Toggl group ids, returning only users in any one of the given groups.
357 Returns:
358 dict: The resulting JSON data of users.
359 """
360 if inactive:
361 active_status = "inactive,invited"
362 else:
363 active_status = "active"
364 kwargs["active_status"] = active_status
366 if groups:
367 kwargs["groups"] = ",".join((str(g) for g in groups))
369 response = self.api_organization.get_users(**kwargs)
370 json = response.json()
372 return json
374 def get_workspace_users(self, **kwargs) -> dict:
375 """Get a list of users from the Toggl workspace.
377 Returns:
378 dict: The resulting JSON data of users.
379 """
380 response = self.api_workspace.get_users(**kwargs)
381 json = response.json()
382 return json