Why is context this failing validation?

I have a problem with a context I’m trying to log. In Elasticsearch the error is coming up:

[{"level":"error","message":"error: instance type (array) does not match any allowed primitive type (allowed: [\"object\"])\n level: \"error\"\n schema: {\"loadingURI\":\"#\",\"pointer\":\"/items\"}\n instance: {\"pointer\":\"/0\"}\n domain: \"validation\"\n keyword: \"type\"\n found: \"array\"\n expected: [\"object\"]\n"}]

However, my decoded context parameter (cx= in the log line) is

{
  "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0",
  "data": [
    [
      {
        "schema": "iglu:org.ietf/http_cookie/jsonschema/1-0-0",
        "data": {
          "name": "webgainsUrlData",
          "value": "https://track.webgains.com/transaction.html?wgver=1.2&wgprotocol=https&wgsubdomain=track&wgslang=javascript-client&wglang=en_EN"
        }
      }
    ]
  ]
}

which looks right?

What am I missing here?

Looking at this again, I’ve just realised it appears to have an extra [] which means I must be pushing an array onto contexts in the tracker. I will look for a bug in my JS code!

@iain Looks like the schema expects to see an array but gets an object instead. The JSON for the context should look like this to pass validation:

[{
	"schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0",
	"data": {
		"schema": "iglu:org.ietf/http_cookie/jsonschema/1-0-0",
		"data": {
			"name": "webgainsUrlData",
			"value": "https://track.webgains.com/transaction.html?wgver=1.2&wgprotocol=https&wgsubdomain=track&wgslang=javascript-client&wglang=en_EN"
		}
	}
}]

This way you’re sending it an array that contains – in this case – one object with two properties: “schema” and “data”.

You can easily check event JSON against schema using this handy [schema validator] (http://www.jsonschemavalidator.net/).

It also shows that your nested object validates against the http_cookie schema without issues.

Hope this helps.

Thank you, yes that’s the exact problem.

The schema validator will be very handy.

Would I be able to use Hadoop event recovery to reprocess the bad rows and remove the from the context?

Hi @iain - yes sure, this should work.

That worked brilliantly. Thank you!!

Something that might be worth clarifying in the docs for the Hadoop Event recovery is that if you’re looking for errors in elasticsearch, the error array has level and message in it, whereas the process() function just requires an array of errors.

I tested my process() function using the node runtime first, and used the following code to set up a test array using a JSON copied from Kibana.

var fs = require('fs');

var contents = fs.readFileSync(__dirname+'/bad_row_example.json', 'utf8');
var badRow = JSON.parse(contents);
var errors = [];

for (var i = 0; i < badRow.errors.length; i++){
    errors.push(badRow.errors[i].message);
}

badRow.errors = errors;

Then I added node versions of your builtin functions at the top of my file:

// Helper functions
function tsvToArray(event) {
    return event.split('\t', -1);
}
function arrayToTsv(tsv) {
    return tsv.join('\t');
}


function parseQuerystring(qstr) {
    var query = {};
    var a = qstr.split('&');
    for (var i = 0; i < a.length; i++) {
        var b = a[i].split('=');
        query[decodeURIComponent(b[0])] = decodeURIComponent(b[1] || '');
    }
    return query;
}
function buildQuerystring(dict) {
    var parts = [];
    for (var i in dict) {
        if (dict.hasOwnProperty(i)) {
            parts.push(encodeURIComponent(i) + '=' + encodeURIComponent(dict[i]));
        }
    }
    return parts.join('&');
}

function decodeBase64(encodedString) {
    var b = new Buffer(encodedString, 'base64');
    return b.toString();
}

function encodeBase64(unencodedString) {
    var b = new Buffer(unencodedString);
    return b.toString('base64');
}

function parseJson(jsonString) {
    return JSON.parse(jsonString);
}
function stringifyJson(jsonObject) {
    return JSON.stringify(jsonObject);
}

Which made testing in Webstorm very quick + simple

2 Likes

Great approach @iain - thanks for sharing!