added modules for activitywatch and oura with code from notebooks

ErikBjare · Feb 27, 2020 · 18615e6 · 18615e6
1 parent bf98140
commit 18615e6
Show file tree

Hide file tree

Showing 9 changed files with 561 additions and 336 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -10,6 +10,3 @@
 [submodule "mifit-data-export"]
 	path = vendor/mifit-data-export
 	url = https://github.com/Dolnor/mifit-data-export.git
-[submodule "aw-research"]
-	path = aw-research
-	url = https://github.com/ActivityWatch/aw-research.git
diff --git a/Makefile b/Makefile
@@ -18,5 +18,5 @@ build-notebooks:
 
 jupyter:
 	# From: https://stackoverflow.com/a/47296960/965332
-	poetry install --skip-lock ipykernel
+	poetry run pip3 install ipykernel
 	poetry run bash -c 'python -m ipykernel install --user --name=`basename $$VIRTUAL_ENV`'
diff --git a/aw-research b/aw-research
diff --git a/notebooks/QuantifiedMe - Dashboard.ipynb b/notebooks/QuantifiedMe - Dashboard.ipynb
@@ -101,6 +101,8 @@
     "from aw_research.classify import _union_no_overlap\n",
     "from aw_research import verify_no_overlap, split_into_weeks, split_into_days\n",
     "\n",
+    "from quantifiedme.activitywatch import load_complete_timeline\n",
+    "\n",
     "# Use XKCD-style plots\n",
     "# FIXME: Causes the day trend plots to take forever for some unknown reason\n",
     "# matplotlib.pyplot.xkcd(scale=0.8, randomness=1)"
@@ -200,155 +202,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "events = []"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Generate fake data\n",
-    "\n",
-    "So I can show you the plots in this notebook without sacrificing my privacy!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data_weights = [\n",
-    "    (100, None),\n",
-    "    (2, {'title': 'Uncategorized'}),\n",
-    "    (5, {'title': 'ActivityWatch'}),\n",
-    "    (4, {'title': 'Thankful'}),\n",
-    "    (3, {'title': 'QuantifiedMe'}),\n",
-    "    (3, {'title': 'FMAA01 - Analysis in One Variable'}),\n",
-    "    (3, {'title': 'EDAN95 - Applied Machine Learning'}),\n",
-    "    (2, {'title': 'Stack Overflow'}),\n",
-    "    (2, {'title': 'phone: Brilliant'}),\n",
-    "    (2, {'url': 'youtube.com', 'title': 'YouTube'}),\n",
-    "    (1, {'url': 'reddit.com'}),\n",
-    "    (1, {'url': 'facebook.com'}),\n",
-    "    (1, {'title': 'Plex'}),\n",
-    "    (1, {'title': 'Spotify'}),\n",
-    "    (1, {'title': 'Fallout 4'}),\n",
-    "]\n",
-    "\n",
-    "def create_fake_events(start: datetime, end: datetime):\n",
-    "    # First set RNG seeds to make the notebook reproducible\n",
-    "    random.seed(0)  \n",
-    "    np.random.seed(0)\n",
-    "    \n",
-    "    pareto_alpha = 0.5\n",
-    "    pareto_mode = 5\n",
-    "    time_passed = timedelta()\n",
-    "    while start + time_passed < end:\n",
-    "        duration = timedelta(seconds=np.random.pareto(pareto_alpha) * pareto_mode)\n",
-    "        duration = min([timedelta(hours=1), duration])\n",
-    "        timestamp = start + time_passed\n",
-    "        data = random.choices([d[1] for d in data_weights], [d[0] for d in data_weights])[0]\n",
-    "        if data:\n",
-    "            yield Event(timestamp=timestamp, duration=duration, data=data)\n",
-    "        time_passed += duration\n",
-    "    \n",
-    "if 'fake' in datasources:\n",
-    "    fake_events = list(create_fake_events(start=since.astimezone(timezone.utc), end=now.astimezone(timezone.utc)))\n",
-    "    events += fake_events"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Load ActivityWatch data\n",
-    "\n",
-    "Retrieve events from aw-server. Queried for active windows combined with browser history and filters by AFK/audible."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if 'activitywatch' in datasources:\n",
-    "    # Split up into previous days and today, to take advantage of caching\n",
-    "    # TODO: Split up into whole days\n",
-    "    events_aw = []\n",
-    "    for dtstart, dtend in split_into_weeks(since, now):\n",
-    "        events_aw += aw_research.classify.get_events(since=dtstart, end=dtend, include_smartertime=False, include_toggl=False)\n",
-    "        print(len(events_aw))\n",
-    "    for e in events_aw:\n",
-    "        e.data['$source'] = 'activitywatch'\n",
-    "\n",
-    "    events = _union_no_overlap(events, events_aw)\n",
-    "    verify_no_overlap(events)\n",
-    "    \n",
-    "# The above code does caching using joblib, use the following if you want to clear the cache:\n",
-    "# aw_research.classify.memory.clear()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Load SmarterTime data\n",
-    "\n",
-    "[SmarterTime](https://play.google.com/store/apps/details?id=com.smartertime&hl=en) is an Android app that tracks app usage. It was primarily used by me before I got ActivityWatch on Android working (but is still, for old data).\n",
-    "\n",
-    "The code loads an ActivityWatch bucket that I converted from the app export (so there's one step here I haven't shown)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def load_smartertime():\n",
-    "    events_smartertime = []\n",
-    "    for smartertime_awbucket_path in [\n",
-    "        'data/smartertime/smartertime_export_erb-a1_2019-02-18_bb7f26aa.awbucket.json',\n",
-    "        'data/smartertime/smartertime_export_erb-f1-miui_2019-10-17_6465fafb.awbucket.json'\n",
-    "    ]:\n",
-    "        new_events = aw_research.classify._get_events_smartertime(since, filepath=smartertime_awbucket_path)\n",
-    "        events_smartertime = _union_no_overlap(events_smartertime, new_events)\n",
-    "    for e in events_smartertime:\n",
-    "        e.data['$source'] = 'smartertime'\n",
-    "    return events_smartertime\n",
-    "\n",
-    "if 'smartertime' in datasources:\n",
-    "        events_smartertime = load_smartertime()\n",
-    "        verify_no_overlap(events_smartertime)\n",
-    "        events = _union_no_overlap(events, events_smartertime)\n",
-    "        verify_no_overlap(events)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Load Toggl data\n",
-    "\n",
-    "[Toggl](https://toggl.com/) is a web, desktop, and mobile app that lets you track time manually."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from aw_research import load_toggl\n",
-    "        \n",
-    "if 'toggl' in datasources:\n",
-    "    events_toggl = load_toggl(since, now)\n",
-    "    print(f\"Oldest: {min(events_toggl, key=lambda e: e.timestamp).timestamp}\")\n",
-    "    verify_no_overlap(events_toggl)\n",
-    "    events = _union_no_overlap(events, events_toggl)\n",
-    "    verify_no_overlap(events)"
+    "events = load_complete_timeline(datetime.now(tz=timezone.utc) - timedelta(days=90), datasources=datasources)"
    ]
   },
   {
@@ -359,41 +213,6 @@
     "Just to make sure there are no bugs in underlying code."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Verify that no events are older than `since`\n",
-    "assert all([since <= e.timestamp for e in events])\n",
-    "\n",
-    "# Verify that no events take place in the future\n",
-    "# FIXME: Doesn't work with fake data, atm\n",
-    "if 'fake' not in datasources:\n",
-    "    assert all([e.timestamp + e.duration <= now for e in events])\n",
-    "\n",
-    "# Verify that no events overlap\n",
-    "verify_no_overlap(events)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "e1 = Event(**{'data': {'title': 'event 1'},\n",
-    "              'duration': timedelta(seconds=1, microseconds=599000),\n",
-    "              'timestamp': datetime(2018, 12, 15, 16, 42, 0, 906000, tzinfo=timezone.utc)})\n",
-    "e2 = Event(**{'data': {'title': 'event 2'},\n",
-    "              'duration': timedelta(seconds=269, microseconds=602000),\n",
-    "              'timestamp': datetime(2018, 12, 15, 16, 42, 0, 964000, tzinfo=timezone.utc)})\n",
-    "\n",
-    "es = _union_no_overlap([e1], [e2])\n",
-    "verify_no_overlap(es)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -440,86 +259,6 @@
     "print(f\"Tracking coverage: {100 * tracking_cov:.3}%\")"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Annotate data\n",
-    "\n",
-    "Now we want to annotate our data with tags and categories.\n",
-    "To do so we first need to specify tagging and classification rules."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Define tagging rules\n",
-    "\n",
-    "First we need to specify rules used in categorization and tagging.\n",
-    "\n",
-    "To clarify:\n",
-    "\n",
-    " - An event can have **many** tags\n",
-    " - An event can have **only one** category, but will also belong to that category's parent categories (creating a category hierarchy)\n",
-    "\n",
-    "The rules are specified by a list of tuples on the format `(regex, category, parent_category)`. You can write them within the notebook or load them from a CSV file."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "classes = [\n",
-    "    # (Social) Media\n",
-    "    (r'Facebook|facebook.com', 'Social Media', 'Media'),\n",
-    "    (r'Reddit|reddit.com', 'Social Media', 'Media'),\n",
-    "    (r'Spotify|spotify.com', 'Music', 'Media'),\n",
-    "    (r'subcategory without matching', 'Video', 'Media'),\n",
-    "    (r'YouTube|youtube.com', 'YouTube', 'Video'),\n",
-    "    (r'Plex|plex.tv', 'Plex', 'Video'),\n",
-    "    (r'Fallout 4', 'Games', 'Media'),\n",
-    "    \n",
-    "    # Work\n",
-    "    (r'github.com|stackoverflow.com', 'Programming', 'Work'),\n",
-    "    (r'[Aa]ctivity[Ww]atch|aw-.*', 'ActivityWatch', 'Programming'),\n",
-    "    (r'[Qq]uantified[Mm]e', 'QuantifiedMe', 'Programming'),\n",
-    "    (r'[Tt]hankful', 'Thankful', 'Programming'),\n",
-    "    \n",
-    "    # School\n",
-    "    (r'subcategory without matching', 'School', 'Work'),\n",
-    "    (r'Duolingo|Brilliant|Khan Academy', 'Self-directed', 'School'),\n",
-    "    (r'Analysis in One Variable', 'Maths', 'School'),\n",
-    "    (r'Applied Machine Learning', 'CS', 'School'),\n",
-    "    (r'Advanced Web Security', 'CS', 'School'),\n",
-    "]\n",
-    "\n",
-    "# Now load the classes from within the notebook, or from a CSV file.\n",
-    "load_from_file = True if personal else False\n",
-    "if load_from_file:\n",
-    "    aw_research.classify._init_classes(filename=\"./aw-research-sym/categories.toml\")\n",
-    "else:\n",
-    "    aw_research.classify._init_classes(new_classes=classes)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now to actually annotate the events with our defined tags/categories we will use the `classify(events)` function which categorizes events by adding the fields `$tags` and `$category_hierarchy` to the event data."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "events = aw_research.classify.classify(events)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -554,8 +293,10 @@
     "    df = categorytime_during_day(events, category, today)\n",
     "    \n",
     "    # FIXME: This will make the first and last hour to always be 0\n",
-    "    df[start_of_day(today) + day_offset - tz_offset] = 0 \n",
-    "    df[end_of_day(today) + day_offset - tz_offset] = 0\n",
+    "    ix = pd.DatetimeIndex(start=start_of_day(today) + day_offset - tz_offset,\n",
+    "                          end=start_of_day(today) + timedelta(hours=24) + day_offset - tz_offset,\n",
+    "                          freq='H')\n",
+    "    df = df.reindex(ix)\n",
     "    df = df.sort_index().asfreq('H')\n",
     "    \n",
     "    fig = plt.figure(figsize=(18, 3))\n",
@@ -748,7 +489,7 @@
     "    #\"Work\": 200,\n",
     "    \"ActivityWatch\": 300,\n",
     "    \"QuantifiedMe\": 300,\n",
-    "    \"Thankful\": 400,\n",
+    "    \"Thankful\": 300,\n",
     "    \"School\": 600,\n",
     "    #\"Finance\": 1000,\n",
     "    #\"Maths\": 400,\n",
@@ -897,9 +638,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "quantifiedme",
    "language": "python",
-   "name": "python3"
+   "name": "quantifiedme"
   },
   "language_info": {
    "codemirror_mode": {