-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 6d02be9
Showing
10 changed files
with
329 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# https://EditorConfig.org | ||
root = true | ||
|
||
[*] | ||
indent_style = space | ||
indent_size = 2 | ||
end_of_line = lf | ||
charset = utf-8 | ||
trim_trailing_whitespace = true | ||
insert_final_newline = true | ||
|
||
[*.py] | ||
indent_size = 4 | ||
|
||
[*.md] | ||
trim_trailing_whitespace = false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Python | ||
/venv/** | ||
/__pycache__/** | ||
|
||
# Scripts | ||
/data/** | ||
!/data/.gitkeep | ||
/out/** |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2024 Michael Horstmann | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Parser and Converter for Apple Health Export | ||
|
||
Scripts to parse and convert health data after exporting it from Apple's Health app | ||
|
||
## Requirements | ||
|
||
- A somewhat modern version of [Python](https://www.python.org) | ||
- If you want to use it in the CSV script: [jq](https://jqlang.github.io/jq/) | ||
- Shell scripts were written for the [Zsh](https://www.zsh.org) | ||
|
||
## Installation | ||
|
||
- Clone this repository from GitHub | ||
- Then run: `./pip_install.sh` | ||
|
||
## Usage | ||
|
||
- First export all of your health data from Apple's Health app | ||
- Then unpack the exported ZIP archive | ||
- Copy the `Export.xml` file to the `data` directory | ||
- Run one of the two scripts that start with `parse_and_convert` | ||
|
||
## Helpful | ||
|
||
- [Parsing Apple Health data](https://gist.github.com/hoffa/936db2bb85e134709cd263dd358ca309) | ||
- [How to parse XML file exported from Apple iOS Health App […]](https://blog.gwlab.page/how-to-parse-xml-file-exported-from-apple-ios-health-app-and-make-a-sleep-schedule-plot-using-60c652697c81) | ||
|
||
## Contribute | ||
|
||
If you find a bug, feel free to create an issue or a pull request |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import os | ||
|
||
|
||
data_file_path = os.path.join('.', 'data', 'Export.xml') | ||
|
||
|
||
def get_output_file_path(filename: str, suffix: str, subdirectory_name='') -> str: | ||
if not subdirectory_name: | ||
subdirectory_name = suffix | ||
directory_path = os.path.join('.', 'out', subdirectory_name) | ||
os.makedirs(directory_path, exist_ok=True) | ||
return os.path.join(directory_path, filename + '.' + suffix) | ||
|
||
|
||
def get_argparse_description(output_description: str) -> str: | ||
return '\n'.join([ | ||
'this is one of two scripts to parse and convert health data after exporting it from Apple\'s Health app:', | ||
f'this script parses an XML file ("{data_file_path}") and converts it into {output_description}' | ||
]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import xml.etree.ElementTree as ET | ||
import json | ||
import subprocess | ||
import argparse | ||
import sys | ||
|
||
import globals | ||
|
||
|
||
def parse_health_export() -> tuple[list[dict], list[str]]: | ||
records: list[dict] = [] | ||
record_types: list[str] = [] | ||
|
||
for _, elem in ET.iterparse(globals.data_file_path): | ||
if elem.tag == 'Record': | ||
records.append(elem.attrib) | ||
if elem.attrib['type'] not in record_types: | ||
record_types.append(elem.attrib['type']) | ||
|
||
return records, record_types | ||
|
||
|
||
def print_all_record_types(record_types: list[str]): | ||
print(json.dumps(record_types, indent=4)) | ||
|
||
|
||
def write_all_records_json_file(records: list[dict]): | ||
with open(globals.get_output_file_path('all_records', 'json', 'jq'), 'w') as json_file: | ||
json_file.write(json.dumps(records, indent=4) + '\n') | ||
|
||
|
||
def write_all_records_txt_file(records: list[dict]): | ||
with open(globals.get_output_file_path('all_records', 'txt', 'jq'), 'w') as txt_file: | ||
for record in records: | ||
txt_file.write(json.dumps(record) + '\n') | ||
|
||
|
||
def write_all_records_csv_file_with_jq(): | ||
cat_command = f'cat {globals.get_output_file_path('all_records', 'txt', 'jq')}' | ||
jq_command = 'jq -r "[.type, .creationDate, .startDate, .endDate, .value, .unit, .device, .sourceName, .sourceVersion] | @csv"' | ||
output_file_path = globals.get_output_file_path('all_records', 'csv', 'jq') | ||
subprocess.run(f'{cat_command} | {jq_command} > {output_file_path}', shell=True) | ||
|
||
|
||
def main() -> int: | ||
parser = argparse.ArgumentParser(description=globals.get_argparse_description('JSON and CSV files'), formatter_class=argparse.RawDescriptionHelpFormatter) | ||
parser.add_argument('-p', '--print-types', help='whether all record types should be printed', action='store_true') | ||
parser.add_argument('-j', '--write-json', help='whether all records JSON file should be written', action='store_true') | ||
parser.add_argument('-c', '--write-csv', help='whether all records CSV file should be written with jq', action='store_true') | ||
|
||
args = parser.parse_args() | ||
if not any(vars(args).values()): | ||
parser.print_help() | ||
return 0 | ||
|
||
cli_options = { | ||
'print_all_record_types': args.print_types, | ||
'write_all_records_json_file': args.write_json, | ||
'write_all_records_csv_file': args.write_csv | ||
} | ||
|
||
records, record_types = parse_health_export() | ||
|
||
if cli_options['print_all_record_types']: | ||
print('All record types:\n') | ||
print_all_record_types(record_types) | ||
return 0 | ||
|
||
if cli_options['write_all_records_json_file']: | ||
print('All records JSON file is being written …') | ||
write_all_records_json_file(records) | ||
|
||
if cli_options['write_all_records_csv_file']: | ||
print('All records text file is being written …') | ||
write_all_records_txt_file(records) | ||
print('All records CSV file is being written with jq …') | ||
write_all_records_csv_file_with_jq() | ||
|
||
return 0 | ||
|
||
|
||
if __name__ == '__main__': | ||
sys.exit(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import xml.etree.ElementTree as ET | ||
import pandas as pd | ||
import argparse | ||
import sys | ||
|
||
import globals | ||
|
||
|
||
pd.options.mode.copy_on_write = True | ||
|
||
|
||
def parse_health_export() -> tuple[pd.DataFrame, list[str]]: | ||
tree = ET.parse(globals.data_file_path) | ||
root = tree.getroot() | ||
|
||
records = [element.attrib for element in root.iter('Record')] | ||
records_df = pd.DataFrame(records) | ||
|
||
date_columns = ['creationDate', 'startDate', 'endDate'] | ||
records_df[date_columns] = records_df[date_columns].apply( | ||
lambda date_column: pd.to_datetime(date_column).dt.tz_localize(None) | ||
) | ||
|
||
record_types: list[str] = records_df['type'].unique().tolist() | ||
|
||
return records_df, record_types | ||
|
||
|
||
def to_snake_case(string: str) -> str: | ||
return ''.join(['_' + char.lower() if char.isupper() else char for char in string]).removeprefix('_') | ||
|
||
|
||
def type_identifier_to_name(type_identifier: str) -> str: | ||
for prefix in ['HKQuantityTypeIdentifier', 'HKCategoryTypeIdentifier', 'HKDataType']: | ||
type_identifier = type_identifier.removeprefix(prefix) | ||
return to_snake_case(type_identifier) | ||
|
||
|
||
def print_all_record_types(record_types: list[str]): | ||
for count, record_type in enumerate(record_types): | ||
print(str(count + 1) + ':', record_type, '->', type_identifier_to_name(record_type)) | ||
|
||
|
||
def write_all_records_excel_file(records_df: pd.DataFrame, rearranged=False): | ||
filename = 'all_records' | ||
if rearranged: | ||
filename += '_rearranged' | ||
file_path = globals.get_output_file_path(filename, 'xlsx') | ||
print(f'Write all records Excel file to: "{file_path}"') | ||
records_df.to_excel(file_path) | ||
|
||
|
||
def write_blood_pressure_excel_file(rearranged_records_df: pd.DataFrame, reduce_output: bool): | ||
blood_pressure_systolic_df = rearranged_records_df.query('type == "HKQuantityTypeIdentifierBloodPressureSystolic"') | ||
if reduce_output: | ||
blood_pressure_systolic_df.drop(columns=['type', 'unit'], inplace=True) | ||
else: | ||
blood_pressure_systolic_df.drop(columns=['type', 'unit', 'device', 'sourceName', 'sourceVersion'], inplace=True) | ||
|
||
blood_pressure_diastolic_df = rearranged_records_df.query(f'type == "HKQuantityTypeIdentifierBloodPressureDiastolic"') | ||
blood_pressure_diastolic_df.drop(columns=['type', 'startDate', 'endDate'], inplace=True) | ||
|
||
merged_blood_pressure_df = pd.merge(blood_pressure_systolic_df, blood_pressure_diastolic_df, on='creationDate') | ||
merged_blood_pressure_df.rename(columns={'value_x': 'valueSystolic', 'value_y': 'valueDiastolic'}, inplace=True) | ||
|
||
file_path = globals.get_output_file_path('blood_pressure', 'xlsx') | ||
print(f'Write blood pressure Excel file to: "{file_path}"') | ||
merged_blood_pressure_df.to_excel(file_path) | ||
|
||
|
||
def write_all_other_excel_files(record_types: list[str], rearranged_records_df: pd.DataFrame): | ||
# Remove blood pressure record types | ||
for record_type in ['HKQuantityTypeIdentifierBloodPressureSystolic', 'HKQuantityTypeIdentifierBloodPressureDiastolic']: | ||
if record_type in record_types: | ||
record_types.remove(record_type) | ||
|
||
for record_type in record_types: | ||
filtered_records_df = rearranged_records_df.query(f'type == "{record_type}"') | ||
filtered_records_df.drop(columns='type', inplace=True) | ||
|
||
if record_type.startswith('HKCategoryTypeIdentifier'): | ||
filtered_records_df.drop(columns='unit', inplace=True) | ||
|
||
record_name = type_identifier_to_name(record_type) | ||
file_path = globals.get_output_file_path(record_name, 'xlsx') | ||
print(f'Write {record_name.replace('_', ' ')} Excel file to: "{file_path}"') | ||
filtered_records_df.to_excel(file_path) | ||
|
||
|
||
def main() -> int: | ||
parser = argparse.ArgumentParser(description=globals.get_argparse_description('Excel files'), formatter_class=argparse.RawDescriptionHelpFormatter) | ||
parser.add_argument('-p', '--print-types', help='whether all record types should be printed', action='store_true') | ||
parser.add_argument('-o', '--one-file', help='whether all records should be written to one Excel file', action='store_true') | ||
parser.add_argument('-s', '--separate-files', help='whether all records should be written to separate Excel files', action='store_true') | ||
parser.add_argument('-r', '--reduce-output', help='whether to reduce output when writing Excel files', action='store_true') | ||
|
||
args = parser.parse_args() | ||
if not any(vars(args).values()): | ||
parser.print_help() | ||
return 0 | ||
|
||
cli_options = { | ||
'print_all_record_types': args.print_types, | ||
'write_all_records_excel_file': args.one_file, | ||
'write_all_other_excel_files': args.separate_files, | ||
'reduce_output': args.reduce_output | ||
} | ||
|
||
if sum(vars(args).values()) == 1 and cli_options['reduce_output']: | ||
print('info: the option to reduce output has no effect if no Excel files are being written') | ||
return 0 | ||
|
||
records_df, record_types = parse_health_export() | ||
|
||
if cli_options['print_all_record_types']: | ||
print('All record types:\n') | ||
print_all_record_types(record_types) | ||
return 0 | ||
|
||
if cli_options['reduce_output']: | ||
rearranged_records_df = records_df[['type', 'creationDate', 'startDate', 'endDate', 'value', 'unit']] | ||
else: | ||
rearranged_records_df = records_df[['type', 'creationDate', 'startDate', 'endDate', 'value', 'unit', 'device', 'sourceName', 'sourceVersion']] | ||
|
||
if cli_options['write_all_records_excel_file']: | ||
print('All records Excel files are being written:\n') | ||
write_all_records_excel_file(records_df) | ||
write_all_records_excel_file(rearranged_records_df, True) | ||
if cli_options['write_all_other_excel_files']: | ||
print('\n') | ||
|
||
if cli_options['write_all_other_excel_files']: | ||
print('All other Excel files are being written:\n') | ||
write_blood_pressure_excel_file(rearranged_records_df, cli_options['reduce_output']) | ||
write_all_other_excel_files(record_types, rearranged_records_df) | ||
|
||
return 0 | ||
|
||
|
||
if __name__ == '__main__': | ||
sys.exit(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/bin/zsh | ||
|
||
python3 -m venv venv | ||
source ./venv/bin/activate | ||
python3 -m pip install -r requirements.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
pandas | ||
openpyxl |