Contract error (Contract violation ) when running EmrEtlRunner


#1

Hey,
I have been getting contract violation error on running emr-etl-runner.
Below is the error traceback:

ReturnContractError (Contract violation for return value:
        Expected: {:aws=>{:access_key_id=>String, :secret_access_key=>String, :s3=>{:region=>String, :buckets=>{:assets=>String, :jsonpath_assets=>(String or nil), :log=>String, :raw=>{:in=>(a collection Array of String), :processing=>String, :archive=>String}, :enriched=>{:good=>String, :bad=>String, :errors=>(String or nil), :archive=>(String or nil)}, :shredded=>{:good=>String, :bad=>String, :errors=>(String or nil), :archive=>(String or nil)}}}, :emr=>{:ami_version=>String, :region=>String, :jobflow_role=>String, :service_role=>String, :placement=>(String or nil), :ec2_subnet_id=>(String or nil), :ec2_key_name=>String, :bootstrap=>((a collection Array of String) or nil), :software=>{:hbase=>(String or nil), :lingual=>(String or nil)}, :jobflow=>{:master_instance_type=>String, :core_instance_count=>Num, :core_instance_type=>String, :core_instance_ebs=>#<Contracts::Maybe:0x1dadd172 @vals=[{:volume_size=>#<Proc:0x5dc120ab@uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb:26 (lambda)>, :volume_type=>#<Proc:0x49c4118b@uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb:25 (lambda)>, :volume_iops=>#<Contracts::Maybe:0x7ef7f414 @vals=[#<Proc:0x5dc120ab@uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb:26 (lambda)>, nil]>, :ebs_optimized=>#<Contracts::Maybe:0x4c3d72fd @vals=[Contracts::Bool, nil]>}, nil]>, :task_instance_count=>Num, :task_instance_type=>String, :task_instance_bid=>(Num or nil)}, :additional_info=>(String or nil), :bootstrap_failure_tries=>Num}}, :collectors=>{:format=>String}, :enrich=>{:job_name=>String, :versions=>{:hadoop_enrich=>String, :hadoop_shred=>String}, :continue_on_unexpected_error=>Bool, :output_compression=>#<Proc:0xabf1816@uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb:24 (lambda)>}, :storage=>{:download=>{:folder=>(String or nil)}}, :monitoring=>{:tags=>(Hash<Symbol, String>), :logging=>{:level=>String}, :snowplow=>({:method=>String, :collector=>String, :app_id=>String} or nil)}},
        Actual: {:aws=>{:access_key_id=>"XXXXXXX", :secret_access_key=>"XXXXXX", :s3=>{:region=>"us-west-2", :buckets=>{:assets=>"s3://snowplow-hosted-assets", :jsonpath_assets=>nil, :log=>"s3://XXX", :raw=>{:in=>["s3://XXX"], :processing=>"s3://XXX", :archive=>"s3://XXX"}, :enriched=>{:good=>"s3://XXX", :bad=>"s3://XXX", :errors=>nil, :archive=>"s3://XXX"}, :shredded=>{:good=>"s3://XXX", :bad=>"s3://XXX", :errors=>nil, :archive=>"s3://XXX"}}}, :emr=>{:ami_version=>"5.5.0", :region=>"us-west-2", :jobflow_role=>"EMR_EC2_DefaultRole", :service_role=>"EMR_DefaultRole", :placement=>nil, :ec2_subnet_id=>"subnet-XXX", :ec2_key_name=>"Snowplow-EMR", :bootstrap=>[], :software=>{:hbase=>nil, :lingual=>nil}, :jobflow=>{:job_name=>"Snowplow ETL", :master_instance_type=>"m4.large", :core_instance_count=>2, :core_instance_type=>"m4.large", :core_instance_ebs=>{:volume_size=>100, :volume_type=>"gp2", :volume_iops=>400, :ebs_optimized=>false}, :task_instance_count=>0, :task_instance_type=>"m4.large", :task_instance_bid=>0.015}, :bootstrap_failure_tries=>3, :additional_info=>nil}}, :collectors=>{:format=>"cloudfront"}, :enrich=>{:versions=>{:spark_enrich=>"1.9.0"}, :continue_on_unexpected_error=>false, :output_compression=>"NONE"}, :storage=>{:versions=>{:rdb_shredder=>"0.12.0", :hadoop_elasticsearch=>"0.1.0"}, :download=>{:folder=>nil}}, :monitoring=>{:tags=>{:name=>"EMRETLRunner"}, :logging=>{:level=>"DEBUG"}}}
        Value guarded in: Snowplow::EmrEtlRunner::Cli::load_config
        With Contract: Maybe, String => Hash
        At: uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/cli.rb:137 ):
    uri:classloader:/gems/contracts-0.11.0/lib/contracts.rb:45:in `block in Contract'
    uri:classloader:/gems/contracts-0.11.0/lib/contracts.rb:154:in `failure_callback'
    uri:classloader:/gems/contracts-0.11.0/lib/contracts/call_with.rb:80:in `call_with'
    uri:classloader:/gems/contracts-0.11.0/lib/contracts/method_handler.rb:138:in `block in load_config'
    uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/cli.rb:108:in `process_options'
    uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/cli.rb:94:in `get_args_config_enrichments_resolver'
    uri:classloader:/gems/contracts-0.11.0/lib/contracts/method_reference.rb:43:in `send_to'
    uri:classloader:/gems/contracts-0.11.0/lib/contracts/call_with.rb:76:in `call_with'
    uri:classloader:/gems/contracts-0.11.0/lib/contracts/method_handler.rb:138:in `block in get_args_config_enrichments_resolver'
    uri:classloader:/emr-etl-runner/bin/snowplow-emr-etl-runner:37:in `<main>'
    org/jruby/RubyKernel.java:973:in `load'
    uri:classloader:/META-INF/main.rb:1:in `<main>'
    org/jruby/RubyKernel.java:955:in `require'
    uri:classloader:/META-INF/main.rb:1:in `(root)'
    uri:classloader:/META-INF/jruby.home/lib/ruby/stdlib/rubygems/core_ext/kernel_require.rb:1:in `<main>'

Here is the config file I am using:

aws:
  # Credentials can be hardcoded or set in environment variables
  access_key_id: XXX
  secret_access_key: XXX
  s3:
    region: us-west-2
    buckets:
      assets: s3://snowplow-hosted-assets # DO NOT CHANGE unless you are hosting the jarfiles etc yourself in your own bucket
      jsonpath_assets: # If you have defined your own JSON Schemas, add the s3:// path to your own JSON Path files in your own bucket here
      log: s3://XXX
      raw:
        in:                  # This is a YAML array of one or more in buckets - you MUST use hyphens before each entry in the array, as below
          - s3://XXX        # e.g. s3://my-old-collector-bucket
       #   - ADD HERE         # e.g. s3://my-new-collector-bucket
        processing: s3://XXX
        archive: s3://XXX    # e.g. s3://my-archive-bucket/raw
      enriched:
        good: s3://XXX       # e.g. s3://my-out-bucket/enriched/good
        bad: s3://XXX        # e.g. s3://my-out-bucket/enriched/bad
        errors:     # Leave blank unless :continue_on_unexpected_error: set to true below
        archive: s3://XXX    # Where to archive enriched events to, e.g. s3://my-archive-bucket/enriched
      shredded:
        good: s3://XXX       # e.g. s3://my-out-bucket/shredded/good
        bad: s3://XXX        # e.g. s3://my-out-bucket/shredded/bad
        errors:      # Leave blank unless :continue_on_unexpected_error: set to true below
        archive: s3://XXX    # Where to archive shredded events to, e.g. s3://my-archive-bucket/shredded
  emr:
    ami_version: 5.5.0
    region: us-west-2        # Always set this
    jobflow_role: EMR_EC2_DefaultRole # Created using $ aws emr create-default-roles
    service_role: EMR_DefaultRole     # Created using $ aws emr create-default-roles
    placement:      # Set this if not running in VPC. Leave blank otherwise
    ec2_subnet_id:  subnet-XXX # Set this if running in VPC. Leave blank otherwise
    ec2_key_name: Snowplow-EMR
    bootstrap: []           # Set this to specify custom boostrap actions. Leave empty otherwise
    software:
      hbase:                # Optional. To launch on cluster, provide version, "0.92.0", keep quotes. Leave empty otherwise.
      lingual:              # Optional. To launch on cluster, provide version, "1.1", keep quotes. Leave empty otherwise.
    # Adjust your Hadoop cluster below
    jobflow:
      job_name: Snowplow ETL # Give your job a name
      master_instance_type: m4.large
      core_instance_count: 2
      core_instance_type: m4.large
      core_instance_ebs:    # Optional. Attach an EBS volume to each core instance.
        volume_size: 100    # Gigabytes
        volume_type: "gp2"
        volume_iops: 400    # Optional. Will only be used if volume_type is "io1"
        ebs_optimized: false # Optional. Will default to true
      task_instance_count: 0 # Increase to use spot instances
      task_instance_type: m4.large
      task_instance_bid: 0.015 # In USD. Adjust bid, or leave blank for non-spot-priced (i.e. on-demand) task instances
    bootstrap_failure_tries: 3 # Number of times to attempt the job in the event of bootstrap failures
    additional_info:        # Optional JSON string for selecting additional features
collectors:
  format: cloudfront # For example: 'clj-tomcat' for the Clojure Collector, 'thrift' for Thrift records, 'tsv/com.amazon.aws.cloudfront/wd_access_log' for Cloudfront access logs or 'ndjson/urbanairship.connect/v1' for UrbanAirship Connect events
enrich:
  versions:
    spark_enrich: 1.9.0 # Version of the Spark Enrichment process
  continue_on_unexpected_error: false # Set to 'true' (and set :out_errors: above) if you don't want any exceptions thrown from ETL
  output_compression: NONE # Compression only supported with Redshift, set to NONE if you have Postgres targets. Allowed formats: NONE, GZIP
storage:
  versions:
    rdb_shredder: 0.12.0        # Version of the Spark Shredding process
    hadoop_elasticsearch: 0.1.0 # Version of the Hadoop to Elasticsearch copying process
  download:
    folder: # Postgres-only config option. Where to store the downloaded files. Leave blank for Redshift
monitoring:
  tags: {'name':'EMRETLRunner'} # Name-value pairs describing this job
  logging:
    level: DEBUG # You can optionally switch to INFO for production
  #snowplow:
   # method: get
   # app_id: snowplow # e.g. snowplow
   # collector: ADD HERE # e.g. d3rkrsqld9gmqf.cloudfront.net

I tried looking at solutions which other people tried for a similar issue but none of them have worked so far. Can anyone help me figure what am I missing here?
Thanks for all your help in advance!


#2

If you pretty print and compare:

Expected: {:aws=>{:access_key_id=>String, :secret_access_key=>String, :s3=>{:region=>String, :buckets=>{:assets=>String, :jsonpath_assets=>(String or nil), :log=>String, :raw=>{:in=>(a collection Array of String), :processing=>String, :archive=>String}, :enriched=>{:good=>String, :bad=>String, :errors=>(String or nil), :archive=>(String or nil)}, :shredded=>{:good=>String, :bad=>String, :errors=>(String or nil), :archive=>(String or nil)}}}, :emr=>{:ami_version=>String, :region=>String, :jobflow_role=>String, :service_role=>String, :placement=>(String or nil), :ec2_subnet_id=>(String or nil), :ec2_key_name=>String, :bootstrap=>((a collection Array of String) or nil), :software=>{:hbase=>(String or nil), :lingual=>(String or nil)}, :jobflow=>{:master_instance_type=>String, :core_instance_count=>Num, :core_instance_type=>String, :core_instance_ebs=>#<Contracts::Maybe:0x1dadd172 @vals=[{:volume_size=>#<Proc:0x5dc120ab@uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb:26 (lambda)>, :volume_type=>#<Proc:0x49c4118b@uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb:25 (lambda)>, :volume_iops=>#<Contracts::Maybe:0x7ef7f414 @vals=[#<Proc:0x5dc120ab@uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb:26 (lambda)>, nil]>, :ebs_optimized=>#<Contracts::Maybe:0x4c3d72fd @vals=[Contracts::Bool, nil]>}, nil]>, :task_instance_count=>Num, :task_instance_type=>String, :task_instance_bid=>(Num or nil)}, :additional_info=>(String or nil), :bootstrap_failure_tries=>Num}}, :collectors=>{:format=>String}, :enrich=>{:job_name=>String, :versions=>{:hadoop_enrich=>String, :hadoop_shred=>String}, :continue_on_unexpected_error=>Bool, :output_compression=>#<Proc:0xabf1816@uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb:24 (lambda)>}, :storage=>{:download=>{:folder=>(String or nil)}}, :monitoring=>{:tags=>(Hash<Symbol, String>), :logging=>{:level=>String}, :snowplow=>({:method=>String, :collector=>String, :app_id=>String} or nil)}},

with:

Actual: {:aws=>{:access_key_id=>"XXXXXXX", :secret_access_key=>"XXXXXX", :s3=>{:region=>"us-west-2", :buckets=>{:assets=>"s3://snowplow-hosted-assets", :jsonpath_assets=>nil, :log=>"s3://XXX", :raw=>{:in=>["s3://XXX"], :processing=>"s3://XXX", :archive=>"s3://XXX"}, :enriched=>{:good=>"s3://XXX", :bad=>"s3://XXX", :errors=>nil, :archive=>"s3://XXX"}, :shredded=>{:good=>"s3://XXX", :bad=>"s3://XXX", :errors=>nil, :archive=>"s3://XXX"}}}, :emr=>{:ami_version=>"5.5.0", :region=>"us-west-2", :jobflow_role=>"EMR_EC2_DefaultRole", :service_role=>"EMR_DefaultRole", :placement=>nil, :ec2_subnet_id=>"subnet-XXX", :ec2_key_name=>"Snowplow-EMR", :bootstrap=>[], :software=>{:hbase=>nil, :lingual=>nil}, :jobflow=>{:job_name=>"Snowplow ETL", :master_instance_type=>"m4.large", :core_instance_count=>2, :core_instance_type=>"m4.large", :core_instance_ebs=>{:volume_size=>100, :volume_type=>"gp2", :volume_iops=>400, :ebs_optimized=>false}, :task_instance_count=>0, :task_instance_type=>"m4.large", :task_instance_bid=>0.015}, :bootstrap_failure_tries=>3, :additional_info=>nil}}, :collectors=>{:format=>"cloudfront"}, :enrich=>{:versions=>{:spark_enrich=>"1.9.0"}, :continue_on_unexpected_error=>false, :output_compression=>"NONE"}, :storage=>{:versions=>{:rdb_shredder=>"0.12.0", :hadoop_elasticsearch=>"0.1.0"}, :download=>{:folder=>nil}}, :monitoring=>{:tags=>{:name=>"EMRETLRunner"}, :logging=>{:level=>"DEBUG"}}}

that should tell you what is wrong.


#3

Hi @apoorva007,

You seem to be using EmrEtlRunner for R88 release while the configuration file is for R89. There’s a slight difference in the format of the configuration file between two of the versions.

Here the output for expected structure tells : jobflow => {: master_instance_type => String, . . .. The parameter job_name is not expected there but expected in : enrich => {: job_name => String,. . .. This is an indication of the runner prior to R89.

However, the actual config is for R89.

Does it make sense?


#4

Guys, I’m having a similar issue for no apparent reason, since it was running fine previously and the configuration hasn’t changed. Tried pretty printing it, and the only difference I can see is that I have "hadoop_elasticsearch => “0.1.0"”, and it’s not present in the expected json. However, commenting it out does not help.

Actual:

{
aws => {
		access_key_id => nil,
			secret_access_key => nil,
			s3 => {
				region => "eu-central-1",
				buckets => {
					assets => "s3//snowplow-hosted-assets",
						jsonpath_assets => "s3//my-snowplow-assets",
						log => "s3//my-snowplow-etl/logs",
						raw => {
						  in => ["s3//in-bucket"],
							processing => "s3//my-snowplow-etl/processing",
							archive => "s3//my-snowplow-archive/raw"
						},
						enriched => {
							good => "s3//my-snowplow-data/enriched/good",
							bad => "s3//my-snowplow-data/enriched/bad",
							errors => "s3//my-snowplow-data/enriched/errors",
							archive => "s3//my-snowplow-data/enriched/archive"
						},
						shredded => {
							good => "s3//my-snowplow-data/shredded/good",
							bad => "s3//my-snowplow-data/shredded/bad",
							errors => "s3//my-snowplow-data/shredded/errors",
							archive => "s3//my-snowplow-data/shredded/archive"
						}
				}
			}
			emr => {
				ami_version => "4.5.0",
					region => "eu-central-1",
					jobflow_role => "EMR_EC2_DefaultRole",
					service_role => "EMR_DefaultRole",
					placement => nil,
					ec2_subnet_id => "subnet-XXXXX",
					ec2_key_name => "keyname",
					bootstrap => [],
					software => {
						hbase => nil,
						lingual => nil
					},
					jobflow => {
						master_instance_type => "m4.large",
						core_instance_count => 4,
						core_instance_type => "c3.4xlarge",
						task_instance_count => 0,
						task_instance_type => "c4.large",
						task_instance_bid => nil
					},
					bootstrap_failure_tries => 3,
					additional_info => nil
			}
	},
	collectors => {
		format => "clj-tomcat"
	},
	enrich => {
		job_name => "Snowplow ETL",
			versions => {
				hadoop_enrich => "1.8.0",
					hadoop_shred => "0.11.0",
					hadoop_elasticsearch => "0.1.0"
			},
			continue_on_unexpected_error => false,
			output_compression => "GZIP"
	},
	storage => {
		download => {
			folder => nil
		}
	},
	monitoring => {
		tags => {
				Name => "snowplow-enrichment"
			},
			logging => {
				level => "DEBUG"
			}
	}

}

Expected:

{
aws => {
	access_key_id => String,
secret_access_key => String,
s3 => {
		region => String,
  buckets => {
			assets => String,
    jsonpath_assets => (String or nil),
    log => String,
    raw => {
      in => (a collection Array of String),
      processing => String,
      archive => String
			},
    enriched => {
				good => String,
      bad => String,
      errors => (String or nil),
      archive => (String or nil)
			},
    shredded => {
				good => String,
      bad => String,
      errors => (String or nil),
      archive => (String or nil)
			}
		}
	}, emr => {
		ami_version => String, region => String, jobflow_role => String, service_role => String, placement => (String or nil), ec2_subnet_id => (String or nil), ec2_key_name => String, bootstrap => ((a collection Array of String) or nil), software => {
			hbase => (String or nil), lingual => (String or nil)
		}, jobflow => {
			master_instance_type => String, core_instance_count => Num, core_instance_type => String, core_instance_ebs => # < ContractsMaybe0x2ed84be9 @vals = [{
				volume_size => # < Proc0x1e288c76 @uriclassloader / emr - etl - runner / lib / snowplow - emr - etl - runner / contracts.rb26(lambda) > ,
				volume_type => # < Proc0x738ed8f5 @uriclassloader / emr - etl - runner / lib / snowplow - emr - etl - runner / contracts.rb25(lambda) > ,
				volume_iops => # < ContractsMaybe0x1cfb7450 @vals = [# < Proc0x1e288c76 @uriclassloader / emr - etl - runner / lib / snowplow - emr - etl - runner / contracts.rb26(lambda) > , nil] > ,
				ebs_optimized => # < ContractsMaybe0x1d1deb11 @vals = [ContractsBool, nil] >
			}, nil] > , task_instance_count => Num, task_instance_type => String, task_instance_bid => (Num or nil)
		}, additional_info => (String or nil), bootstrap_failure_tries => Num
	}
}, collectors => {
	format => String
}, enrich => {
	job_name => String, versions => {
		hadoop_enrich => String, hadoop_shred => String
	}, continue_on_unexpected_error => Bool, output_compression => # < Proc0x77988c45 @uriclassloader / emr - etl - runner / lib / snowplow - emr - etl - runner / contracts.rb24(lambda) >
}, storage => {
	download => {
		folder => (String or nil)
	}
}, monitoring => {
	tags => (Hash < Symbol, String > ), logging => {
		level => String
	}, snowplow => ({
			method => String,
			collector => String,
			app_id => String
		}
		or nil)
}}

I’m running r88 with elasticsearch and redshift targets. This is very confusing, as it literally ran fine just a few hours ago. Any ideas?


#5

Hi @kazgurs1,

Could you please print full stacktrace? I have a feeling that problem is not in configuration contract, but somewhere in the code as Ruby Contracts check values in runtime and value types can vary if something thrown exception for example. This can signal us that we’re not handling some situation correctly.


#6

Here it is:

F, [2017-07-28T08:26:30.137000 #11334] FATAL -- : 
10:26:30 
10:26:30 ReturnContractError (Contract violation for return value:
10:26:30         Expected: {:aws=>{:access_key_id=>String, :secret_access_key=>String, :s3=>{:region=>String, :buckets=>{:assets=>String, :jsonpath_assets=>(String or nil), :log=>String, :raw=>{:in=>(a collection Array of String), :processing=>String, :archive=>String}, :enriched=>{:good=>String, :bad=>String, :errors=>(String or nil), :archive=>(String or nil)}, :shredded=>{:good=>String, :bad=>String, :errors=>(String or nil), :archive=>(String or nil)}}}, :emr=>{:ami_version=>String, :region=>String, :jobflow_role=>String, :service_role=>String, :placement=>(String or nil), :ec2_subnet_id=>(String or nil), :ec2_key_name=>String, :bootstrap=>((a collection Array of String) or nil), :software=>{:hbase=>(String or nil), :lingual=>(String or nil)}, :jobflow=>{:master_instance_type=>String, :core_instance_count=>Num, :core_instance_type=>String, :core_instance_ebs=>#<Contracts::Maybe:0x35744f8 @vals=[{:volume_size=>#<Proc:0x738ed8f5@uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb:26 (lambda)>, :volume_type=>#<Proc:0x1cfb7450@uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb:25 (lambda)>, :volume_iops=>#<Contracts::Maybe:0x478c84aa @vals=[#<Proc:0x738ed8f5@uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb:26 (lambda)>, nil]>, :ebs_optimized=>#<Contracts::Maybe:0x18d1d137 @vals=[Contracts::Bool, nil]>}, nil]>, :task_instance_count=>Num, :task_instance_type=>String, :task_instance_bid=>(Num or nil)}, :additional_info=>(String or nil), :bootstrap_failure_tries=>Num}}, :collectors=>{:format=>String}, :enrich=>{:job_name=>String, :versions=>{:hadoop_enrich=>String, :hadoop_shred=>String}, :continue_on_unexpected_error=>Bool, :output_compression=>#<Proc:0xf5f8de2@uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb:24 (lambda)>}, :storage=>{:download=>{:folder=>(String or nil)}}, :monitoring=>{:tags=>(Hash<Symbol, String>), :logging=>{:level=>String}, :snowplow=>({:method=>String, :collector=>String, :app_id=>String} or nil)}},
10:26:30         Actual: {:aws=>{:access_key_id=>nil, :secret_access_key=>nil, :s3=>{:region=>"eu-central-1", :buckets=>{:assets=>"s3://snowplow-hosted-assets", :jsonpath_assets=>"s3://my-snowplow-assets", :log=>"s3://my-snowplow-etl/logs", :raw=>{:in=>["s3://elasticbeanstalk-eu-central-1-890672996299/resources/environments/logs/publish/e-3vymgtnbsi"], :processing=>"s3://my-snowplow-etl/processing", :archive=>"s3://my-snowplow-archive/raw"}, :enriched=>{:good=>"s3://my-snowplow-data/enriched/good", :bad=>"s3://my-snowplow-data/enriched/bad", :errors=>"s3://my-snowplow-data/enriched/errors", :archive=>"s3://my-snowplow-data/enriched/archive"}, :shredded=>{:good=>"s3://my-snowplow-data/shredded/good", :bad=>"s3://my-snowplow-data/shredded/bad", :errors=>"s3://my-snowplow-data/shredded/errors", :archive=>"s3://my-snowplow-data/shredded/archive"}}}, :emr=>{:ami_version=>"4.5.0", :region=>"eu-central-1", :jobflow_role=>"EMR_EC2_DefaultRole", :service_role=>"EMR_DefaultRole", :placement=>nil, :ec2_subnet_id=>"subnet-XXXXXX", :ec2_key_name=>"keyname", :bootstrap=>[], :software=>{:hbase=>nil, :lingual=>nil}, :jobflow=>{:master_instance_type=>"m4.large", :core_instance_count=>3, :core_instance_type=>"c3.2xlarge", :task_instance_count=>0, :task_instance_type=>"c4.large", :task_instance_bid=>nil}, :bootstrap_failure_tries=>3, :additional_info=>nil}}, :collectors=>{:format=>"clj-tomcat"}, :enrich=>{:job_name=>"Snowplow ETL", :versions=>{:hadoop_enrich=>"1.8.0", :hadoop_shred=>"0.11.0", :hadoop_elasticsearch=>"0.1.0"}, :continue_on_unexpected_error=>false, :output_compression=>"GZIP"}, :storage=>{:download=>{:folder=>nil}}, :monitoring=>{:tags=>{:Name=>"snowplow-etl"}, :logging=>{:level=>"DEBUG"}}}
10:26:30         Value guarded in: Snowplow::EmrEtlRunner::Cli::load_config
10:26:30         With Contract: Maybe, String => Hash
10:26:30         At: uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/cli.rb:137 ):
10:26:30     uri:classloader:/gems/contracts-0.11.0/lib/contracts.rb:45:in `block in Contract'
10:26:30     uri:classloader:/gems/contracts-0.11.0/lib/contracts.rb:154:in `failure_callback'
10:26:30     uri:classloader:/gems/contracts-0.11.0/lib/contracts/call_with.rb:80:in `call_with'
10:26:30     uri:classloader:/gems/contracts-0.11.0/lib/contracts/method_handler.rb:138:in `block in redefine_method'
10:26:30     uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/cli.rb:108:in `process_options'
10:26:30     uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/cli.rb:94:in `get_args_config_enrichments_resolver'
10:26:30     uri:classloader:/gems/contracts-0.11.0/lib/contracts/method_reference.rb:43:in `send_to'
10:26:30     uri:classloader:/gems/contracts-0.11.0/lib/contracts/call_with.rb:76:in `call_with'
10:26:30     uri:classloader:/gems/contracts-0.11.0/lib/contracts/method_handler.rb:138:in `block in redefine_method'
10:26:30     uri:classloader:/emr-etl-runner/bin/snowplow-emr-etl-runner:37:in `<main>'
10:26:30     org/jruby/RubyKernel.java:973:in `load'
10:26:30     uri:classloader:/META-INF/main.rb:1:in `<main>'
10:26:30     org/jruby/RubyKernel.java:955:in `require'
10:26:30     uri:classloader:/META-INF/main.rb:1:in `(root)'
10:26:30     uri:classloader:/META-INF/jruby.home/lib/ruby/stdlib/rubygems/core_ext/kernel_require.rb:1:in `<main>'
10:26:30 
10:26:30

#7

@kazgurs1 if I remember it correct even if you’re using env vars for AWS credentials they should always be String, not nil, which I see in your message. Can you confirm AWS creds are available and you didn’t change anything regarding their retrieval?


#8

Damn it, that was it:) thanks for your help. I take AWS credentials from env vars, and this somehow got knocked.