Can't start EmrEtlRunner


#1

Hi guys,

I’ve tried multiple tweaks to the config file but keep stuck in the error below:

F, [2016-04-22T15:07:00.715000 #25771] FATAL – :

ContractError (Contract violation for return value:
Expected: {:aws=>{:access_key_id=>String, :secret_access_key=>String, :s3=>{:region=>String, :buckets=>{:assets=>String, :jsonpath_assets=>#<Contracts::Maybe:0x67ef639d @vals=[String, nil]>, :log=>String, :raw=>{:in=>#<Contracts::ArrayOf:0x15750dba @contract=String>, :processing=>String, :archive=>String}, :enriched=>{:good=>String, :bad=>String, :errors=>#<Contracts::Maybe:0x37d83957 @vals=[String, nil]>, :archive=>#<Contracts::Maybe:0x549c9b65 @vals=[String, nil]>}, :shredded=>{:good=>String, :bad=>String, :errors=>#<Contracts::Maybe:0x759e2f48 @vals=[String, nil]>, :archive=>#<Contracts::Maybe:0x64325c4b @vals=[String, nil]>}}}, :emr=>{:ami_version=>String, :region=>String, :jobflow_role=>String, :service_role=>String, :placement=>#<Contracts::Maybe:0x26497351 @vals=[String, nil]>, :ec2_subnet_id=>#<Contracts::Maybe:0x2031de7a @vals=[String, nil]>, :ec2_key_name=>String, :bootstrap=>#<Contracts::Maybe:0x2a052c19 @vals=[#<Contracts::ArrayOf:0x42f14c75 @contract=String>, nil]>, :software=>{:hbase=>#<Contracts::Maybe:0x399232dc @vals=[String, nil]>, :lingual=>#<Contracts::Maybe:0x2d49270b @vals=[String, nil]>}, :jobflow=>{:master_instance_type=>String, :core_instance_count=>Contracts::Num, :core_instance_type=>String, :task_instance_count=>Contracts::Num, :task_instance_type=>String, :task_instance_bid=>#<Contracts::Maybe:0x1da25a67 @vals=[Contracts::Num, nil]>}, :additional_info=>#<Contracts::Maybe:0xddd2f64 @vals=[String, nil]>, :bootstrap_failure_tries=>Contracts::Num}}, :collectors=>{:format=>String}, :enrich=>{:job_name=>String, :versions=>{:hadoop_enrich=>String, :hadoop_shred=>String}, :continue_on_unexpected_error=>Contracts::Bool, :output_compression=>#<Proc:0x44befb0@/home/ec2-user/snowplow-emr-etl-runner!/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb:23 (lambda)>}, :storage=>{:download=>{:folder=>#<Contracts::Maybe:0x9611f3e @vals=[String, nil]>}, :targets=>#<Contracts::ArrayOf:0x474a2441 @contract={:name=>String, :type=>String, :host=>String, :database=>String, :port=>Contracts::Num, :ssl_mode=>#<Contracts::Maybe:0x19e089f0 @vals=[String, nil]>, :table=>String, :username=>#<Contracts::Maybe:0xda99b7f @vals=[String, nil]>, :password=>#<Contracts::Maybe:0x37dc39d9 @vals=[String, nil]>, :es_nodes_wan_only=>#<Contracts::Maybe:0x6e2ca46d @vals=[Contracts::Bool, nil]>, :maxerror=>#<Contracts::Maybe:0x26e37689 @vals=[Contracts::Num, nil]>, :comprows=>#<Contracts::Maybe:0x50a125fc @vals=[Contracts::Num, nil]>}>}, :monitoring=>{:tags=>#<Contracts::HashOf:0x54924bf7 @value=String, @key=Symbol>, :logging=>{:level=>String}, :snowplow=>#<Contracts::Maybe:0x50b97081 @vals=[{:method=>String, :collector=>String, :app_id=>String}, nil]>}},
Actual: {:aws=>{:access_key_id=>"#####", :secret_access_key=>"#####", :s3=>{:region=>“eu-west-1”, :buckets=>{:assets=>“s3://snowplow-hosted-assets”, :jsonpath_assets=>nil, :log=>“s3n://snowplow-bucket-data/logs/”, :raw=>{:in=>[“s3n://elasticbeanstalk-eu-west-1-602232737466/resources/environments/logs/publish/e-bxv4qkd84p”], :processing=>“s3n://snowplow-bucket-data/processing/”, :archive=>“s3://snowplow-bucket-data/raw”}, :enriched=>{:good=>“s3://snowplow-bucket-data/enriched/good”, :bad=>“s3://snowplow-bucket-data/enriched/bad”, :errors=>nil, :archive=>“s3://snowplow-bucket-data/enriched/archive”}, :shredded=>{:good=>“s3://snowplow-bucket-data/shredded/good”, :bad=>“s3://snowplow-bucket-data/shredded/bad”, :errors=>nil, :archive=>“s3://snowplow-bucket-data/shredded/archive”}}}, :emr=>{:ami_version=>“4.3.0”, :region=>“eu-west-1”, :jobflow_role=>“EMR_EC2_DefaultRole”, :service_role=>“EMR_DefaultRole”, :placement=>nil, :ec2_subnet_id=>“subnet-ccb0a1ae”, :ec2_key_name=>“snowplow-ec2”, :bootstrap=>[], :software=>{:hbase=>nil, :lingual=>nil}, :jobflow=>{:master_instance_type=>“m1.medium”, :core_instance_count=>2, :core_instance_type=>“m1.medium”, :task_instance_count=>0, :task_instance_type=>“m1.medium”, :task_instance_bid=>0.015}, :bootstrap_failure_tries=>3, :additional_info=>nil}}, :collectors=>{:format=>“clj-tomcat”}, :enrich=>{:job_name=>“Snowplow ETL”, :versions=>{:hadoop_enrich=>“1.6.0”, :hadoop_shred=>“0.8.0”, :hadoop_elasticsearch=>“0.1.0”}, :continue_on_unexpected_error=>false, :output_compression=>“NONE”}, :storage=>{:download=>{:folder=>{:targets=>[]}}, :targets=>nil}, :monitoring=>{:tags=>{}, :logging=>{:level=>“DEBUG”}}}

The config.yml used:

aws:
access_key_id: #####
secret_access_key: #####
s3:
region: eu-west-1
buckets:
assets: s3://snowplow-hosted-assets
jsonpath_assets:
log: s3n://snowplow-bucket-data/logs/
raw:
in:
- "s3n://elasticbeanstalk-eu-west-1-602232737466/resources/environments/logs/publish/e-bxv4qkd84p"
processing: s3n://snowplow-bucket-data/processing/
archive: s3://snowplow-bucket-data/raw
enriched:
good: s3://snowplow-bucket-data/enriched/good
bad: s3://snowplow-bucket-data/enriched/bad
errors:
archive: s3://snowplow-bucket-data/enriched/archive
shredded:
good: s3://snowplow-bucket-data/shredded/good
bad: s3://snowplow-bucket-data/shredded/bad
errors:
archive: s3://snowplow-bucket-data/shredded/archive
emr:
ami_version: 4.3.0
region: eu-west-1 # Always set this
jobflow_role: EMR_EC2_DefaultRole # Created using aws emr create-default-roles service_role: EMR_DefaultRole # Created using aws emr create-default-roles
placement: # Set this if not running in VPC. Leave blank otherwise
ec2_subnet_id: subnet-ccb0a1ae # Set this if running in VPC. Leave blank otherwise
ec2_key_name: snowplow-ec2
bootstrap: [] # Set this to specify custom boostrap actions. Leave empty otherwise
software:
hbase: # Optional. To launch on cluster, provide version, “0.92.0”, keep quotes. Leave empty otherwise.
lingual: # Optional. To launch on cluster, provide version, “1.1”, keep quotes. Leave empty otherwise.
# Adjust your Hadoop cluster below
jobflow:
master_instance_type: m1.medium
core_instance_count: 2
core_instance_type: m1.medium
task_instance_count: 0 # Increase to use spot instances
task_instance_type: m1.medium
task_instance_bid: 0.015 # In USD. Adjust bid, or leave blank for non-spot-priced (i.e. on-demand) task instances
bootstrap_failure_tries: 3 # Number of times to attempt the job in the event of bootstrap failures
additional_info: # Optional JSON string for selecting additional features
collectors:
format: clj-tomcat
enrich:
job_name: Snowplow ETL # Give your job a name
versions:
hadoop_enrich: 1.6.0 # Version of the Hadoop Enrichment process
hadoop_shred: 0.8.0 # Version of the Hadoop Shredding process
hadoop_elasticsearch: 0.1.0 # Version of the Hadoop to Elasticsearch copying process
continue_on_unexpected_error: false
output_compression: NONE
storage:
download:
folder: # Postgres-only config option. Where to store the downloaded files. Leave blank for Redshift
targets: []
targets:
monitoring:
tags: {} # Name-value pairs describing this job
logging:
level: DEBUG # You can optionally switch to INFO for production


#2

Hi @T_P,

I can see you listed targets: twice in your config.yml:

  targets: [] 
  targets:

Can you remove the 2nd and try again?

Regards,
Ihor


#3

Thank you @ihor :wink:
Now it works!