Shredder - Loader Error - Repository not Found

Hey,

we are shredding data from the enriched bucket into the shredded bucket. However, they end up in the shredded-bucket/bad folder with the following error:

{
   "schema":"iglu:com.snowplowanalytics.snowplow.badrows/loader_iglu_error/jsonschema/2-0-0",
   "data":{
      "processor":{
         "artifact":"snowplow-rdb-shredder",
         "version":"0.19.0"
      },
      "failure":[
         {
            "schemaCriterion":"iglu:com.myapp/minimal_tracking_event/jsonschema/1-*-*",
            "error":{
               "error":"ResolutionError",
               "lookupHistory":[
                  {
                     "repository":"Iglu Central",
                     "errors":[
                        {
                           "error":"NotFound"
                        }
                     ],
                     "attempts":5,
                     "lastAttempt":"2021-02-15T09:05:04.659Z"
                  },
                  {
                     "repository":"Iglu Client Embedded",
                     "errors":[
                        {
                           "error":"NotFound"
                        }
                     ],
                     "attempts":1,
                     "lastAttempt":"2021-02-15T09:04:59.733Z"
                  },
                  {
                     "repository":"S3-schemas-registry",
                     "errors":[
                        {
                           "error":"NotFound"
                        }
                     ],
                     "attempts":5,
                     "lastAttempt":"2021-02-15T09:05:03.914Z"
                  }
               ]
            }
         }
      ],
      "payload":{
         "app_id":null,
         "platform":"pc",
         "etl_tstamp":"2021-02-10T14:59:05.524Z",
         "collector_tstamp":"2021-02-10T14:59:03.570Z",
         "dvce_created_tstamp":"2021-02-10T14:58:36.201Z",
         "event":"unstruct",
         "event_id":"b6667045-7c66-4e99-9e4b-6affa0fb761e",
         "txn_id":null,
         "name_tracker":null,
         "v_tracker":"py-0.8.4",
         "v_collector":"ssc-2.0.1-kinesis",
         "v_etl":"stream-enrich-1.4.2-common-1.4.2",
         "user_id":null,
         "user_ipaddress":"x.x.x.x",
         "user_fingerprint":null,
         "domain_userid":null,
         "domain_sessionidx":null,
         "network_userid":"7ea843ba-0a53-42e2-b4ca-c3ce7eb1bab2",
         "geo_country":null,
         "geo_region":null,
         "geo_city":null,
         "geo_zipcode":null,
         "geo_latitude":null,
         "geo_longitude":null,
         "geo_region_name":null,
         "ip_isp":null,
         "ip_organization":null,
         "ip_domain":null,
         "ip_netspeed":null,
         "page_url":null,
         "page_title":null,
         "page_referrer":null,
         "page_urlscheme":null,
         "page_urlhost":null,
         "page_urlport":null,
         "page_urlpath":null,
         "page_urlquery":null,
         "page_urlfragment":null,
         "refr_urlscheme":null,
         "refr_urlhost":null,
         "refr_urlport":null,
         "refr_urlpath":null,
         "refr_urlquery":null,
         "refr_urlfragment":null,
         "refr_medium":null,
         "refr_source":null,
         "refr_term":null,
         "mkt_medium":null,
         "mkt_source":null,
         "mkt_term":null,
         "mkt_content":null,
         "mkt_campaign":null,
         "contexts":{
            
         },
         "se_category":null,
         "se_action":null,
         "se_label":null,
         "se_property":null,
         "se_value":null,
         "unstruct_event":{
            "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0",
            "data":{
               "schema":"iglu:com.myapp/minimal_tracking_event/jsonschema/1-0-0",
               "data":{
                  "user":"soak_test_user",
                  "platform":"soak_test_plattform",
                  "url":"soak_test_url"
               }
            }
         },
         "tr_orderid":null,
         "tr_affiliation":null,
         "tr_total":null,
         "tr_tax":null,
         "tr_shipping":null,
         "tr_city":null,
         "tr_state":null,
         "tr_country":null,
         "ti_orderid":null,
         "ti_sku":null,
         "ti_name":null,
         "ti_category":null,
         "ti_price":null,
         "ti_quantity":null,
         "pp_xoffset_min":null,
         "pp_xoffset_max":null,
         "pp_yoffset_min":null,
         "pp_yoffset_max":null,
         "useragent":"python-requests/2.25.1",
         "br_name":null,
         "br_family":null,
         "br_version":null,
         "br_type":null,
         "br_renderengine":null,
         "br_lang":null,
         "br_features_pdf":null,
         "br_features_flash":null,
         "br_features_java":null,
         "br_features_director":null,
         "br_features_quicktime":null,
         "br_features_realplayer":null,
         "br_features_windowsmedia":null,
         "br_features_gears":null,
         "br_features_silverlight":null,
         "br_cookies":null,
         "br_colordepth":null,
         "br_viewwidth":null,
         "br_viewheight":null,
         "os_name":null,
         "os_family":null,
         "os_manufacturer":null,
         "os_timezone":null,
         "dvce_type":null,
         "dvce_ismobile":null,
         "dvce_screenwidth":null,
         "dvce_screenheight":null,
         "doc_charset":null,
         "doc_width":null,
         "doc_height":null,
         "tr_currency":null,
         "tr_total_base":null,
         "tr_tax_base":null,
         "tr_shipping_base":null,
         "ti_currency":null,
         "ti_price_base":null,
         "base_currency":null,
         "geo_timezone":null,
         "mkt_clickid":null,
         "mkt_network":null,
         "etl_tags":null,
         "dvce_sent_tstamp":"2021-02-10T14:59:03Z",
         "refr_domain_userid":null,
         "refr_dvce_tstamp":null,
         "derived_contexts":{
            
         },
         "domain_sessionid":null,
         "derived_tstamp":"2021-02-10T14:58:36.771Z",
         "event_vendor":"com.myapp",
         "event_name":"minimal_tracking_event",
         "event_format":"jsonschema",
         "event_version":"1-0-0",
         "event_fingerprint":null,
         "true_tstamp":null
      }
   }
}

We host our custom schemas (minimal_tracking_event) in a S3 bucket in the same account. In addition we point to the official iglu repo. This is our resolver.json. We are using the same for the enrichment module:

{
  "schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-0",
  "data": {
    "cacheSize": 500,
    "repositories": [
      {
        "name": "S3-schemas-registry",
        "priority": 0,
        "vendorPrefixes": ["com.myapp"],
        "connection": {
          "http": {
            "uri": "SP_SCHEMA_URI" 
          }
        }
      }, 
      {
        "name": "Iglu Central",
        "priority": 1,
        "vendorPrefixes": [ "com.snowplowanalytics" ],
        "connection": {
          "http": {
            "uri": "http://iglucentral.com"
          }
        }
      },
      {
        "name": "Iglu Central - Mirror 01",
        "priority": 2,
        "vendorPrefixes": ["com.snowplowanalytics"],
        "connection": {
          "http": {
            "uri": "http://mirror01.iglucentral.com"
          }
        }
      }
    ]
  }
}

What’s the URI for your S3 schema repository? Is it accessible over HTTP / HTTPS from the enricher?

1 Like

Hey @mgloel ,

Is there any chance that shredder started to work after schema was made available in the schema registry? Iglu resolver caches responses from the registry and this might be the reason.

Whether it is the reason or not, I also noticed that your resolver configuration points to 1-0-0 version. Let me share that resolver config supports a TTL for the cache since 1-0-2. Latest version is 1-0-3 and available here. Top level cacheTtl property allows to specify a duration so that a schema update/patch etc. wouldn’t require a restart.

Please let us know if it is the case or not and then we can dive deeper if necessary.

1 Like

Hey @mgloel,

So this bad row (ResolutionError with schemaCriterion, specifically) implies that it tries to resolve schema criterion, not schema. And only Iglu Server 0.6.0 is capable of hosting this.

It is necessary if you want to have shred data into TSV and don’t want to use JSONPath files. It was part of R32 migration: R32 Upgrade Guide - Snowplow Docs

If you just want to use R35 with JSONPaths as pre-R32 - you can change formats.default from TSV to JSON in your config HOCON.

3 Likes