Commit d60c4929 authored by S Anand's avatar S Anand
Browse files

ENH: add g1.types(). Fixes #14

parent 5d5898ea
Pipeline #45248 failed with stage
in 0 seconds
...@@ -42,6 +42,8 @@ Utilities: ...@@ -42,6 +42,8 @@ Utilities:
- [$.template](#template) renders lodash templates. Requires [lodash](https://lodash.com/) - [$.template](#template) renders lodash templates. Requires [lodash](https://lodash.com/)
- [event.min.js](dist/event.min.js): event library - [event.min.js](dist/event.min.js): event library
- [$.dispatch](#dispatch) is like [trigger](https://api.jquery.com/trigger/) but sends a native event (triggers non-jQuery events too) - [$.dispatch](#dispatch) is like [trigger](https://api.jquery.com/trigger/) but sends a native event (triggers non-jQuery events too)
- [types.min.js](dist/types.min.js): type detection library
- [g1.types](#types) returns the data types of columns in a DataFrames
## $.urlfilter ## $.urlfilter
...@@ -633,6 +635,46 @@ g1.url.parse('/?a=1&b=2&c=3&d=4') // Update this URL ...@@ -633,6 +635,46 @@ g1.url.parse('/?a=1&b=2&c=3&d=4') // Update this URL
// Returns /?b=3&c=3&c=6&d=7 // Returns /?b=3&c=3&c=6&d=7
``` ```
## types
`g1.types(data)` returns the column data types. For example:
```js
var data = [
{a: 1, b: 1.1, c: 'c', d: '2014-04-04', e: true},
{a: 2, b: 2},
]
g1.types(data) // Returns {"a": "number", "b": "number", "c": "string", "d": "date", "e": "boolean"}
```
### types options
`types()` accepts 2 parameters:
- `data`: a list of objects
- `options`: a dictionary that may contain these keys:
- `convert`: converts values to the right type. For example, "1" is converted to 1. default: `false`
- `limit`: number of rows to evaluate. default: 1000
- `ignore`: list of values that should be ignored. default: `[null, undefined]`
Rules:
- Evaluate up to `limit` rows
- Ignore values that are keys in the `ignore` option. Only consider the rest
- If `convert` is `false`, then for each column:
- If all values are Date objects -> `date`
- Else if all values are numbers -> `number`
- Else if all values are strings -> `string`
- Else if all values are bools -> `boolean`
- Else if there are no values or is undefined or null -> `null`
- Else -> `mixed`
- Else if `convert` is `true`, then for each column:
- If all values can be converted to Date -> `date`
- Else if all values can be converted to numbers -> `number`
- Else if all values are bools -> `boolean`
- Else if there are no values or is undefined or null -> `null`
- Else -> `string`
## Interaction conventions ## Interaction conventions
All interaction components use this naming convention: All interaction components use this naming convention:
......
export { version } from './src/package.js' export { version } from './src/package.js'
export { types } from './src/types.js'
export { url } from './index-urlfilter.js' export { url } from './index-urlfilter.js'
export { datafilter } from './src/datafilter.js' export { datafilter } from './src/datafilter.js'
export { scale } from './src/scale.js' export { scale } from './src/scale.js'
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
"dev": "rimraf dist && json2module package.json > src/package.js && rollup -c -w", "dev": "rimraf dist && json2module package.json > src/package.js && rollup -c -w",
"pretest": "npm run build && browserify -s tape -r tape -o test/tape.js", "pretest": "npm run build && browserify -s tape -r tape -o test/tape.js",
"server": "npm run pretest && npm run lint && node test/server.js", "server": "npm run pretest && npm run lint && node test/server.js",
"test": "npm run lint && tape test/test-*.js | faucet && node test/server.js run | tap-merge | faucet", "test": "tape test/test-*.js | faucet && node test/server.js run | tap-merge | faucet",
"prepublishOnly": "npm test" "prepublishOnly": "npm test"
}, },
"devDependencies": { "devDependencies": {
......
// state_transition[old_state][current_type] -> new_state
// type: undefined and type: null are mapped to state 'null', i.e. missing values
var state_transitions = {
'null': {
'null': { state: 'null' },
'date': { state: 'date' },
'number': { state: 'number' },
'boolean': { state: 'boolean' },
'string': { state: 'string' },
'object': { state: 'object' },
'mixed': { state: 'mixed', end: true },
},
'date': {
'null': { state: 'date' },
'undefined': { state: 'date' },
'date': { state: 'date' },
'default': { state: 'mixed', end: true }
},
'number': {
'null': { state: 'number' },
'undefined': { state: 'number' },
'number': { state: 'number' },
'default': { state: 'mixed', end: true }
},
'boolean': {
'null': { state: 'boolean' },
'undefined': { state: 'boolean' },
'boolean': { state: 'boolean' },
'default': { state: 'mixed', end: true }
},
'string': {
'null': { state: 'string' },
'undefined': { state: 'string' },
'string': { state: 'string' },
'default': { state: 'mixed', end: true }
},
'object': {
'null': { state: 'object' },
'object': { state: 'object' },
'undefined': { state: 'object' },
'default': { state: 'mixed', end: true }
},
'mixed': {
'default': { state: 'mixed', end: true }
}
}
export function types(data, options) {
var result = {}
if (!data || !data.length)
return result
options = options || {}
options.convert = options.convert || false
options.limit = options.limit || 1000
var limit = (options.limit < data.length) ? options.limit : data.length
var ignore = options.ignore = options.ignore || [null, undefined]
var columns = Object.keys(data[0])
for (var columnIndex = 0; columnIndex < columns.length; columnIndex++) {
var column = columns[columnIndex]
var result_type = 'null'
for (var index = 0; index < limit; index++) {
var row = data[index]
var value = row[column]
if (columnIndex == 0) {
Object.keys(data[index]).forEach(function (value) {
if (columns.indexOf(value) == -1)
columns.push(value)
})
}
// Ignore if the value is missing
if (!(column in row))
continue
// Ignore values that are in the ignore list
if (ignore.indexOf(value) >= 0)
continue
// Identify type (date, object, number, boolean, string, undefined, null)
var type = typeof value
if (value === undefined || value === null)
type = 'null'
else if (type == 'object' && !isNaN(Date.parse(value)))
type = 'date'
else if (options.convert) {
// We use parseFloat AND isFinite because
// parseFloat('2018-01') is 2018 but isFinite('2018-01') is false
// Also, 'NaN', 'Infinity' and '-Infinity' should be treated as numbers
if ((!isNaN(parseFloat(value)) && isFinite(value)) || ['NaN', 'Infinity', '-Infinity'].indexOf(value) >= 0)
type = 'number'
else if (!isNaN(Date.parse(value)))
type = 'date'
else if (['true', 'false'].indexOf(value) != -1)
type = 'boolean'
}
// Apply the state change
var state_transition = state_transitions[type]
var change = state_transition[result_type] || state_transition['default']
result_type = change['state']
if (change['end'])
break
}
result[column] = result_type
}
return result
}
const test = require('tape')
const g1 = require('../dist/g1')
test('types test', function(t) {
t.test('types([]) returns {}', function(t) {
t.deepEquals(g1.types([]), {})
t.deepEquals(g1.types({}), {})
t.deepEquals(g1.types(null), {})
t.deepEquals(g1.types(0), {})
t.deepEquals(g1.types(false), {})
t.deepEquals(g1.types(''), {})
t.deepEquals(g1.types(undefined), {})
t.deepEquals(g1.types(), {})
t.end()
})
t.test('types(convert=false) works', function (t) {
t.deepEquals(g1.types([
{ a: 1, b: 1.1, c: 'c', d: new Date(), e: true, f: null, g: 1 },
{ a: 2, b: -1, c: 's', d: new Date(), e: false, f: undefined, g: '', h: 'w' },
{ a: 0 },
{ b: 0 },
]), {
a: 'number',
b: 'number',
c: 'string',
d: 'date',
e: 'boolean',
f: 'null',
g: 'mixed',
h: 'string'
})
t.end()
})
t.test('types(convert=true) comparing number and string works', function (t) {
t.deepEquals(g1.types([
{ a: '1', b: 1.1, c: 'c', d: '2018-02-01', e: true, f: null, g: 1 },
{ a: 2, b: '-1', c: 's', d: new Date(), e: false, f: undefined, g: '' },
{ a: 0 },
{ b: 0 },
], {convert: true}), {
a: 'number',
b: 'number',
c: 'string',
d: 'date',
e: 'boolean',
f: 'null',
g: 'mixed' // TODO: check if this is what we want
})
t.end()
})
t.test('types(convert=true) comparing number and date works', function (t) {
t.deepEquals(g1.types([
{ a: '1', b: 1.1, c: 'c', d: '2018-02-01', e: true, f: null, g: 1 },
{ a: 2, b: '-1', c: 's', d: new Date(), e: false, f: undefined, g: '2018-02-01' },
{ a: 0 },
{ b: 0 },
], {convert: true}), {
a: 'number',
b: 'number',
c: 'string',
d: 'date',
e: 'boolean',
f: 'null',
g: 'mixed' // TODO: check if this is what we want
})
t.end()
})
t.test('types(limit=2) works', function (t) {
t.deepEquals(g1.types([
{ a: 1 },
{ a: 2 },
{ a: 'string' }, // limit excludes this
], {limit: 2}), {
a: 'number'
})
t.end()
})
t.test('types(ignore={}) works', function (t) {
var data = [
{ a: null },
{ a: undefined },
{ a: '' },
{ a: 0 },
{}
]
t.deepEquals(g1.types(data, { ignore: [null, undefined, 0] }), { a: 'string' })
t.deepEquals(g1.types(data, { ignore: [null, undefined, ''] }), { a: 'number' })
t.deepEquals(g1.types(data, { ignore: [null, undefined] }), { a: 'mixed' })
t.deepEquals(g1.types(data, { ignore: ['', 0] }), { a: 'null' })
t.end()
})
// Extend data with dummy values that should be ignored
function check(data, options) {
data.push({ 'a': null })
data.push({ 'a': undefined })
data.push({})
return g1.types(data, options)
}
t.test('types() follows README guidelines if convert is false', function (t) {
var type = function (data) { return check(data, { convert: false }) }
t.deepEquals(type([{ 'a': new Date() }, { 'a': new Date() }]), { 'a': 'date' })
t.deepEquals(type([{ 'a': 0 }, { 'a': NaN }, { 'a': Infinity }]), { 'a': 'number' })
t.deepEquals(type([{ 'a': '' }, { 'a': 's' }]), { 'a': 'string' })
t.deepEquals(type([{ 'a': false }, { 'a': true }]), { 'a': 'boolean' })
t.deepEquals(type([]), { 'a': 'null' })
t.deepEquals(type([{ 'a': new Date() }, { 'a': 'a' }]), { 'a': 'mixed' })
t.deepEquals(type([{ 'a': '' }, { 'a': 0 }]), { 'a': 'mixed' })
t.deepEquals(type([{ 'a': '' }, { 'a': false }]), { 'a': 'mixed' })
t.deepEquals(type([{ 'a': 0 }, { 'a': false }]), { 'a': 'mixed' })
t.deepEquals(type([{ 'a': new Date() }, { 'a': false }]), { 'a': 'mixed' })
t.end()
})
t.test('types() follows README guidelines if convert is true', function (t) {
var type = function (data) { return check(data, { convert: true }) }
t.deepEquals(type([{ 'a': '2018-01-01' }, { 'a': '2018-01-02' }]), { 'a': 'date' })
t.deepEquals(type([{ 'a': '0' }, { 'a': 'NaN' }, { 'a': 'Infinity' }]), { 'a': 'number' })
t.deepEquals(type([{ 'a': '' }, { 'a': 's' }]), { 'a': 'string' })
t.deepEquals(type([{ 'a': 'false' }, { 'a': 'true' }]), { 'a': 'boolean' })
t.deepEquals(type([]), { 'a': 'null' })
t.deepEquals(type([{ 'a': '2018-01-01' }, { 'a': 'a' }]), { 'a': 'mixed' })
t.deepEquals(type([{ 'a': '' }, { 'a': '0' }]), { 'a': 'mixed' })
t.deepEquals(type([{ 'a': '' }, { 'a': 'false' }]), { 'a': 'mixed' })
t.deepEquals(type([{ 'a': 0 }, { 'a': 'false' }]), { 'a': 'mixed' })
t.deepEquals(type([{ 'a': '2018-01-01' }, { 'a': 'false' }]), { 'a': 'mixed' })
t.end()
})
})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment