Skip to content

Commit

Permalink
added two maps scrapers (and postgis)
Browse files Browse the repository at this point in the history
  • Loading branch information
AV committed Apr 5, 2023
1 parent 963b90d commit d912fd8
Show file tree
Hide file tree
Showing 11 changed files with 317 additions and 20 deletions.
23 changes: 21 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,30 @@ services:
dockerfile: timescaledb.Dockerfile
restart: unless-stopped
ports:
- ${POSTGRES_PORT}:${POSTGRES_PORT}
- ${POSTGRES_PORT}:5432
volumes:
- timescaledb-data:/var/lib/postgresql/data
env_file:
- .env

# PGADMIN ###################################################
pgadmin:
container_name: pgadmin
image: dpage/pgadmin4:latest
ports:
- ${PGADMIN_PORT}:80
environment:
PGADMIN_DEFAULT_EMAIL: admin@admin.com
PGADMIN_DEFAULT_PASSWORD: root
# PGADMIN_CONFIG_SERVER_JSON_FILE: /pgadmin4/servers.json
# volumes:
# - ./pgadmin/servers.json:/pgadmin4/servers.json
# - ./pgadmin/pgpass:/pgadmin4/pgpass
depends_on:
- timescaledb
env_file:
- .env

# TELEGRAF ##################################################
telegraf:
container_name: telegraf
Expand Down Expand Up @@ -52,7 +70,8 @@ services:
- ./superscraper:/workspaces/superscraper

ports:
- ${SUPERSCRAPER_PORT}:${SUPERSCRAPER_PORT}
- ${SCRAPER_HEALTHKIT_PORT}:${SCRAPER_HEALTHKIT_PORT}
- ${SCRAPER_MAPS_PORT}:${SCRAPER_MAPS_PORT}

# Overrides default command so things don't shut down after the process ends.
# command: sleep infinity
Expand Down
6 changes: 6 additions & 0 deletions superscraper/ecosystem.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ module.exports = {
args: 'run start -w superscraper-healthkit',
interpreter: 'none',
},
{
name: 'maps',
script: 'npm',
args: 'run start -w superscraper-maps',
interpreter: 'none',
},
// Add more workspaces as needed
],
};
Expand Down
26 changes: 26 additions & 0 deletions superscraper/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 10 additions & 5 deletions superscraper/scrapers/_utils/db.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ async function doesTableExist(schema, tableName) {
}

async function createTable(schema, tableName, columns, uniqueColumns) {
// Check columns include "time"
if (!columns.find(col => col.name === 'time')) {
throw new Error('Columns must include a "time" column');
}

if (!columns || columns.length === 0) {
console.error('No columns provided to create table');
return;
Expand All @@ -58,8 +63,8 @@ async function createTable(schema, tableName, columns, uniqueColumns) {
const uniqueColumnsString = uniqueColumns.map(col => `${col}`).join(', ');
const columnDefinitions = columns.map(col => `${col.name} ${col.type}`).join(', ');
let createTableQuery = `CREATE TABLE IF NOT EXISTS ${schema}.${tableName} (
${uniqueColumnsString},
UNIQUE (${uniqueColumns})
${columnDefinitions},
UNIQUE (${uniqueColumnsString})
);`;

try {
Expand All @@ -68,7 +73,7 @@ async function createTable(schema, tableName, columns, uniqueColumns) {
console.log(createTableQuery);

// Convert table to hypertable
await pool.query(`SELECT create_hypertable('${schema}.${tableName}', 'time');`);
await pool.query(`SELECT create_hypertable('${schema}.${tableName}','time');`);
} catch (error) {
console.error(`Error occurred while creating table ${schema}.${tableName}`, error);
throw error;
Expand Down Expand Up @@ -103,8 +108,8 @@ async function saveData(schema, tableName, data, uniqueColumns) {
}

// // Create Schema
// console.log(`Creating schema ${schema}`);
// await createSchema(schema);
console.log(`Creating schema ${schema}`);
await createSchema(schema);

// Create table if it doesn't exist
await createTable(schema, tableName, data[0], uniqueColumns);
Expand Down
5 changes: 3 additions & 2 deletions superscraper/scrapers/awair/awair-scraper.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
const { getAirData, getDevices, getDeviceIdList } = require("./awair-api.js");
const db = require("../_utils/db.js");

const AIRDATA_TABLE_NAME = "awair_sensor_data";
const AIRDATA_SCHEMA_NAME = "awair";
const AIRDATA_TABLE_NAME = "awair_sensor_data";
const UNIQUE_COLUMNS = ['time','location'];

async function main() {

Expand All @@ -21,7 +22,7 @@ async function main() {
{ name: "pm25", value: pm25, type: 'INTEGER' }
];
});
// await db.saveData(AIRDATA_SCHEMA_NAME, AIRDATA_TABLE_NAME, formattedData, ['time', 'deviceid']);
await db.saveData(AIRDATA_SCHEMA_NAME, AIRDATA_TABLE_NAME, formattedData, UNIQUE_COLUMNS);
}

// read 100 rows from table (for testing)
Expand Down
4 changes: 2 additions & 2 deletions superscraper/scrapers/healthkit/healthkit-scraper.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ const express = require('express');
const fs = require('fs/promises');
const path = require('path');
import * as db from "../_utils/db.js"

const app = express();

app.use(express.json({ limit: '10mb' }));
const serverPort = process.env.SUPERSCRAPER_PORT || 4562;
const serverPort = process.env.SCRAPER_HEALTHKIT_PORT;

const SCHEMA = 'healthkit';

Expand Down
49 changes: 49 additions & 0 deletions superscraper/scrapers/maps/import.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
const fs = require('fs');
const path = require('path');
const db = require('../_utils/db.js');

// Load the GeoJSON file
const geoJSONFile = path.resolve(__dirname, './history/visited-places-detailed.geojson');
const geoJSONData = JSON.parse(fs.readFileSync(geoJSONFile, 'utf-8'));

// Schema, table and unique column names
const schema = 'maps';
const tableName = 'google_maps_visited_places';
const uniqueColumns = ['time','location'];

// Extract the data from the GeoJSON features and format to match the data format for saveData function
const data = geoJSONData.features.map((feature) => {
const coordinates = `POINT(${feature.properties.lon} ${feature.properties.lat})`;
return [
{
name: 'time',
value: feature.properties.from_date,
type: 'TIMESTAMP WITH TIME ZONE',
},
{
name: 'to_date',
value: feature.properties.to_date,
type: 'TIMESTAMP WITH TIME ZONE',
},
{
name: 'location',
value: feature.properties.location,
type: 'TEXT',
},
{
name: 'coordinates',
value: coordinates,
type: 'GEOGRAPHY(POINT, 4326)',
},
];
});

// Save the data to the database
(async () => {
try {
await db.saveData(schema, tableName, data, uniqueColumns);
console.log('Data has been successfully saved to the database.');
} catch (error) {
console.error('An error occurred while saving data to the database:', error);
}
})();
135 changes: 135 additions & 0 deletions superscraper/scrapers/maps/maps-scraper.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
const express = require('express');
const app = express();
const db = require('../_utils/db.js');

const schema = 'maps';
const tableName = 'raw_visited_places';
const uniqueColumns = ['time', 'coordinates'];

app.use(express.json({ limit: '10mb' }));
const serverPort = process.env.SCRAPER_MAPS_PORT;

app.post('/maps', (req, res) => {
handleMapsEndpoint(req, res);
});

app.get('/', async (req, res) => {
handleRootEndpoint(req, res);
});

// Start server
app.listen(serverPort, () => {
console.log(`Server is running on port ${serverPort}`);
});

// Endpoints handling functions
async function handleRootEndpoint(req, res) {
res.status(200).send('Hello from the Maps scraper!');
}

async function handleMapsEndpoint(req, res) {
console.log(JSON.stringify(req.body, null, 2));
res.json({ result: 'ok' });

// parse location json data
const locations = req.body.locations;

const data = [];
locations.forEach((location) => {
const coordinates = `POINT(${location.geometry.coordinates[0]} ${location.geometry.coordinates[1]})`;
data.push([
{
name: 'time',
value: location.properties.timestamp,
type: 'TIMESTAMP WITH TIME ZONE',
},
{
name: 'coordinates',
value: coordinates,
type: 'GEOGRAPHY(POINT, 4326)',
},
{
name: 'altitude',
value: location.properties.altitude,
type: 'DOUBLE PRECISION',
},
{
name: 'speed',
value: location.properties.speed,
type: 'DOUBLE PRECISION',
},
{
name: 'horizontal_accuracy',
value: location.properties.horizontal_accuracy,
type: 'DOUBLE PRECISION',
},
{
name: 'vertical_accuracy',
value: location.properties.vertical_accuracy,
type: 'DOUBLE PRECISION',
},
{
name: 'motion',
value: location.properties.motion.join(','),
type: 'TEXT',
},
{
name: 'pauses',
value: location.properties.pauses,
type: 'BOOLEAN',
},
{
name: 'activity',
value: location.properties.activity,
type: 'TEXT',
},
{
name: 'desired_accuracy',
value: location.properties.desired_accuracy,
type: 'DOUBLE PRECISION',
},
{
name: 'deferred',
value: location.properties.deferred,
type: 'DOUBLE PRECISION',
},
{
name: 'significant_change',
value: location.properties.significant_change,
type: 'TEXT',
},
{
name: 'locations_in_payload',
value: location.properties.locations_in_payload,
type: 'INTEGER',
},
{
name: 'battery_state',
value: location.properties.battery_state,
type: 'TEXT',
},
{
name: 'battery_level',
value: location.properties.battery_level,
type: 'DOUBLE PRECISION',
},
{
name: 'device_id',
value: location.properties.device_id,
type: 'TEXT',
},
{
name: 'wifi',
value: location.properties.wifi,
type: 'TEXT',
},
]);
});

try {
// save data to timescaledb
await db.saveData(schema, tableName, data, uniqueColumns);
} catch (error) {
console.error('Error inserting location data', error);
}
}
19 changes: 19 additions & 0 deletions superscraper/scrapers/maps/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"name": "superscraper-maps",
"version": "1.0.0",
"description": "",
"scripts": {
"start": "node maps-scraper.js",
"clean": "rm -rf node_modules",
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"axios": "^1.3.3",
"dotenv": "^16.0.3",
"express": "^4.18.2",
"pg": "^8.9.0"
}
}

Loading

0 comments on commit d912fd8

Please sign in to comment.