Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unified data access interface #352

Merged
merged 4 commits into from
Dec 28, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
break up make_columnar_data into normalize_value
  • Loading branch information
sc1f committed Dec 22, 2018
commit 50487983ff9bc104844c793cde5b0de38a64a30a
173 changes: 173 additions & 0 deletions packages/perspective/src/js/Parser/DataAccessor.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
/******************************************************************************
*
* Copyright (c) 2018, the Perspective Authors.
*
* This file is part of the Perspective library, distributed under the terms of
* the Apache License 2.0. The full license can be found in the LICENSE file.
*
*/

import {DateParser, DATE_PARSE_CANDIDATES} from "./DateParser.js";
import {get_column_type} from "../utils.js";
import moment from "moment";

export class DataAccessor {
constructor() {
this.data_formats = {
row: 0,
column: 1,
schema: 2
};
this.format = undefined;
this.data = undefined;
this.column_names = undefined;
this.data_types = undefined;
this.row_count = undefined;
this.date_parsers = {};
// TODO: optimize and refactor out
this.moment = moment;
this.candidates = DATE_PARSE_CANDIDATES;
}

extract_typevec(typevec) {
let types = [];
for (let i = 0; i < typevec.size() - 1; i++) {
types.push(typevec.get(i));
}
return types;
}

is_format(data) {
if (Array.isArray(data)) {
return this.data_formats.row;
} else if (Array.isArray(data[Object.keys(data)[0]])) {
return this.data_formats.column;
} else if (typeof data[Object.keys(data)[0]] === "string" || typeof data[Object.keys(data)[0]] === "function") {
return this.data_formats.schema;
} else {
throw "Unknown data format!";
}
}

get_row_count(data) {
if (this.format === this.data_formats.row) {
return data.length;
} else if (this.format === this.data_formats.column) {
return data[Object.keys(data)[0]].length;
} else {
return 0;
}
}

get(column_name, row_index) {
let value = undefined;

if (this.format === this.data_formats.row) {
let d = this.data[row_index];
if (d !== undefined && d.hasOwnProperty(column_name)) {
value = d[column_name];
}
} else if (this.format === this.data_formats.column) {
if (this.data.hasOwnProperty(column_name)) {
value = this.data[column_name][row_index];
}
} else if (this.format === this.data_formats.schema) {
value = undefined;
} else {
throw "Unknown data format!";
}

return value;
}

marshal(column_name, row_index, type) {
let val = clean_data(this.get(column_name, row_index));
let date_parser;

if (val === null) {
return null;
}

if (typeof val === "undefined") {
return undefined;
}

if (this.date_parsers[column_name] === undefined) {
this.date_parsers[column_name] = new DateParser();
}

date_parser = this.date_parsers[column_name];

switch (get_column_type(type.value)) {
case "float": {
val = Number(val);
break;
}
case "integer": {
val = Number(val);
// FIXME: bring this back in
if (val > 2147483647 || val < -2147483648) {
// This handles cases where a long sequence of e.g. 0 precedes a clearly
// float value in an inferred column. Would not be needed if the type inference
// checked the entire column, or we could reset parsing.
//this.data_types[this.column_names.indexOf(name)] = __MODULE__.t_dtype.DTYPE_FLOAT64;
}
break;
}
case "boolean": {
if (typeof val === "string") {
val.toLowerCase() === "true" ? (val = true) : (val = false);
} else {
val = !!val;
}
break;
}
case "datetime":
case "date": {
val = date_parser.parse(val);
break;
}
default: {
val === null ? (val = null) : (val += ""); // TODO this is not right - might not be a string. Need a data cleaner
}
}

return val;
}

/**
* Links the accessor to a package of data for processing,
* calculating its format and size.
*
* @private
* @param {object} __MODULE__: the Module object generated by Emscripten
* @param {object} data
*
* @returns An object with 5 properties:
* cdata - an array of columnar data.
* names - the column names.
* types - the column t_dtypes.
* row_count - the number of rows per column.
* is_arrow - an internal flag marking arrow-formatted data
*/
init(__MODULE__, data) {
this.data = data;
this.format = this.is_format(this.data);
this.row_count = this.get_row_count(this.data);
}
}

/**
* Coerce string null into value null.
* @private
* @param {*} value
*/
export function clean_data(value) {
if (value === null || value === "null") {
return null;
} else if (value === undefined || value === "undefined") {
return undefined;
} else {
return value;
}
}
188 changes: 0 additions & 188 deletions packages/perspective/src/js/parse_data.js

This file was deleted.

Loading