Storage Loader successful but not loading Redshift or Postgres DB

Hello everyone, after a few different config.yml attempts I finally have everything enriching and shredding. Everything runs successfully and I can even go into my s3 bucket enriched/archive bucket and see the data files but for some reason it is not loading into my Redshift DB. I also tried doing it to a local Postgres DB and it did not load there either.

Any ideas?

Thanks.

Do you want to post the error(s) that you’re getting as well as the config you are using for storage loader with the sensitive credentials redacted?

Thats sort of the problem there are no errors being thrown. I will upload the config once I cleanse it.

Here is my config file. The problem I am having is it all runs then the files are in my shredded and enriched archive directories but no data is loaded into my database. Thank you again for any help

I run them using thes commands:
./snowplow-emr-etl-runner --config config/config.yml --resolver config/iglu_resolver.json --enrichments enrichments

./snowplow-storage-loader --config config/config.yml -t /Users/XXXX/Code/snowplow-installation/Enrich\ Config/downloads -r config/iglu_resolver.json

aws:
  # Credentials can be hardcoded or set in environment variables
  access_key_id: XXXXXXXXX
  secret_access_key: XXXXXXXXX
  s3:
    region: us-west-2
    buckets:
      assets: s3://snowplow-hosted-assets # DO NOT CHANGE unless you are hosting the jarfiles etc yourself in your own bucket
      jsonpath_assets: # If you have defined your own JSON Schemas, add the s3:// path to your own JSON Path files in your own bucket here
      log: s3://elasticbeanstalk-us-west-2-XXXXXXXXX/resources/environments/emrlog
      raw:
        in:                  # This is a YAML array of one or more in buckets - you MUST use hyphens before each entry in the array, as below
          - s3://elasticbeanstalk-us-west-2-XXXXXXXXX/resources/environments/logs/publish/e-cftpjpq6vh         # e.g. s3://my-old-collector-bucket
        processing: s3://XXXXXXXXX-etl/processing
        archive: s3://XXXXXXXXX-archive/raw    # e.g. s3://my-archive-bucket/raw
      enriched:
        good: s3://XXXXXXXXX-data/enriched/good       # e.g. s3://my-out-bucket/enriched/good
        bad: s3://XXXXXXXXX-data/enriched/bad        # e.g. s3://my-out-bucket/enriched/bad
        errors:      # Leave blank unless :continue_on_unexpected_error: set to true below
        archive: s3://XXXXXXXXX-data/enriched/archive    # Where to archive enriched events to, e.g. s3://my-archive-bucket/enriched
      shredded:
        good: s3://XXXXXXXXX-data/shredded/good       # e.g. s3://my-out-bucket/shredded/good
        bad: s3://XXXXXXXXX-data/shredded/bad        # e.g. s3://my-out-bucket/shredded/bad
        errors:      # Leave blank unless :continue_on_unexpected_error: set to true below
        archive: s3://XXXXXXXXX-data/shredded/archive    # Where to archive shredded events to, e.g. s3://my-archive-bucket/shredded
  emr:
    ami_version: 4.5.0
    region: us-west-2        # Always set this
    jobflow_role: EMR_EC2_DefaultRole # Created using $ aws emr create-default-roles
    service_role: EMR_DefaultRole     # Created using $ aws emr create-default-roles
    placement: us-west-2b     # Set this if not running in VPC. Leave blank otherwise
    ec2_subnet_id:  # Set this if running in VPC. Leave blank otherwise
    ec2_key_name: spacestation_new
    bootstrap: []           # Set this to specify custom boostrap actions. Leave empty otherwise
    software:
      hbase:                # Optional. To launch on cluster, provide version, "0.92.0", keep quotes. Leave empty otherwise.
      lingual:              # Optional. To launch on cluster, provide version, "1.1", keep quotes. Leave empty otherwise.
    # Adjust your Hadoop cluster below
    jobflow:
      master_instance_type: m1.medium
      core_instance_count: 2
      core_instance_type: m1.medium
      core_instance_ebs:    # Optional. Attach an EBS volume to each core instance.
        volume_size: 100    # Gigabytes
        volume_type: "gp2"
        volume_iops: 400    # Optional. Will only be used if volume_type is "io1"
        ebs_optimized: false # Optional. Will default to true
      task_instance_count: 0 # Increase to use spot instances
      task_instance_type: m1.medium
      task_instance_bid: 0.015 # In USD. Adjust bid, or leave blank for non-spot-priced (i.e. on-demand) task instances
    bootstrap_failure_tries: 3 # Number of times to attempt the job in the event of bootstrap failures
    additional_info:        # Optional JSON string for selecting additional features
collectors:
  format: cloudfront # For example: 'clj-tomcat' for the Clojure Collector, 'thrift' for Thrift records, 'tsv/com.amazon.aws.cloudfront/wd_access_log' for Cloudfront access logs or 'ndjson/urbanairship.connect/v1' for UrbanAirship Connect events
enrich:
  job_name: Snowplow ETL # Give your job a name
  versions:
    hadoop_enrich: 1.8.0 # Version of the Hadoop Enrichment process
    hadoop_shred: 0.10.0 # Version of the Hadoop Shredding process
    hadoop_elasticsearch: 0.1.0 # Version of the Hadoop to Elasticsearch copying process
  continue_on_unexpected_error: false # Set to 'true' (and set :out_errors: above) if you don't want any exceptions thrown from ETL
  output_compression: NONE # Compression only supported with Redshift, set to NONE if you have Postgres targets. Allowed formats: NONE, GZIP
storage:
  download:
    folder: # Postgres-only config option. Where to store the downloaded files. Leave blank for Redshift
  targets:
    - name: "XXXXXXXXX-shredded"
      type: redshift
      host: XXXXXXXXX-shredded.XXXXXXXXX.us-west-2.redshift.amazonaws.com # The endpoint as shown in the Redshift console
      database: XXXXXXXXX # Name of database
      port: 5439 # Default Redshift port
      ssl_mode: disable # One of disable (default), require, verify-ca or verify-full
      table: atomic.events
      username: XXXXXXXXX
      password: XXXXXXXXX
      maxerror: 1 # Stop loading on first error, or increase to permit more load errors
      comprows: 200000 # Default for a 1 XL node cluster. Not used unless --include compupdate specified
monitoring:
  tags: {} # Name-value pairs describing this job
  logging:
    level: DEBUG # You can optionally switch to INFO for production

Interesting, I used an older Storage Loader (v77) and got an error to spit out, here it is:

Loading Snowplow events and shredded types into XXXXXXXXX-shredded (Redshift cluster)…
Unexpected error: Java::OrgPostgresqlUtil::PSQLException error executing COPY statements: BEGIN;
COPY atomic.events FROM ‘s3://XXXXXXXXX-data/shredded/good/run=2017-03-27-18-43-45/atomic-events’ CREDENTIALS ‘aws_access_key_id=xxxxxxxxxxxxxxxxxxxx;aws_secret_access_key=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx’ REGION AS ‘us-west-2’ DELIMITER ‘\t’ MAXERROR 1 EMPTYASNULL FILLRECORD TRUNCATECOLUMNS TIMEFORMAT ‘auto’ ACCEPTINVCHARS ;
COPY atomic.com_snowplowanalytics_snowplow_change_form_1 FROM ‘s3://XXXXXXXXX-data/shredded/good/run=2017-03-27-18-43-45/com.snowplowanalytics.snowplow/change_form/jsonschema/1-’ CREDENTIALS ‘aws_access_key_id=xxxxxxxxxxxxxxxxxxxx;aws_secret_access_key=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx’ JSON AS ‘s3://snowplow-hosted-assets-us-west-2/4-storage/redshift-storage/jsonpaths/com.snowplowanalytics.snowplow/change_form_1.json’ REGION AS ‘us-west-2’ MAXERROR 1 TRUNCATECOLUMNS TIMEFORMAT ‘auto’ ACCEPTINVCHARS ;
COPY atomic.com_snowplowanalytics_snowplow_link_click_1 FROM ‘s3://XXXXXXXXX-data/shredded/good/run=2017-03-27-18-43-45/com.snowplowanalytics.snowplow/link_click/jsonschema/1-’ CREDENTIALS ‘aws_access_key_id=xxxxxxxxxxxxxxxxxxxx;aws_secret_access_key=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx’ JSON AS ‘s3://snowplow-hosted-assets-us-west-2/4-storage/redshift-storage/jsonpaths/com.snowplowanalytics.snowplow/link_click_1.json’ REGION AS ‘us-west-2’ MAXERROR 1 TRUNCATECOLUMNS TIMEFORMAT ‘auto’ ACCEPTINVCHARS ;
COPY atomic.com_snowplowanalytics_snowplow_submit_form_1 FROM ‘s3://XXXXXXXXX-data/shredded/good/run=2017-03-27-18-43-45/com.snowplowanalytics.snowplow/submit_form/jsonschema/1-’ CREDENTIALS ‘aws_access_key_id=xxxxxxxxxxxxxxxxxxxx;aws_secret_access_key=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx’ JSON AS ‘s3://snowplow-hosted-assets-us-west-2/4-storage/redshift-storage/jsonpaths/com.snowplowanalytics.snowplow/submit_form_1.json’ REGION AS ‘us-west-2’ MAXERROR 1 TRUNCATECOLUMNS TIMEFORMAT ‘auto’ ACCEPTINVCHARS ;
COPY atomic.com_snowplowanalytics_snowplow_ua_parser_context_1 FROM ‘s3://XXXXXXXXX-data/shredded/good/run=2017-03-27-18-43-45/com.snowplowanalytics.snowplow/ua_parser_context/jsonschema/1-’ CREDENTIALS ‘aws_access_key_id=xxxxxxxxxxxxxxxxxxxx;aws_secret_access_key=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx’ JSON AS ‘s3://snowplow-hosted-assets-us-west-2/4-storage/redshift-storage/jsonpaths/com.snowplowanalytics.snowplow/ua_parser_context_1.json’ REGION AS ‘us-west-2’ MAXERROR 1 TRUNCATECOLUMNS TIMEFORMAT ‘auto’ ACCEPTINVCHARS ;
COMMIT;: ERROR: Cannot COPY into nonexistent table com_snowplowanalytics_snowplow_change_form_1
uri:classloader:/storage-loader/lib/snowplow-storage-loader/redshift_loader.rb:89:in load_events_and_shredded_types' uri:classloader:/gems/contracts-0.11.0/lib/contracts/method_reference.rb:43:in send_to’
uri:classloader:/gems/contracts-0.11.0/lib/contracts/call_with.rb:76:in call_with' uri:classloader:/gems/contracts-0.11.0/lib/contracts/method_handler.rb:138:in block in redefine_method’
uri:classloader:/storage-loader/bin/snowplow-storage-loader:54:in block in (root)' uri:classloader:/storage-loader/bin/snowplow-storage-loader:51:in
org/jruby/RubyKernel.java:973:in load' uri:classloader:/META-INF/main.rb:1:in
org/jruby/RubyKernel.java:955:in require' uri:classloader:/META-INF/main.rb:1:in (root)’
uri:classloader:/META-INF/jruby.home/lib/ruby/stdlib/rubygems/core_ext/kernel_require.rb:1:in `’