Hi @alex ,
Need help. Unable to process these logs. Java out of memory error:
2016-12-06 12:54:29,122 ERROR [main] cascading.flow.stream.TrapHandler: caught OutOfMemoryException, will not trap, rethrowing
java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOfRange(Arrays.java:2694)
at java.lang.String.(String.java:203)
at java.lang.StringBuilder.toString(StringBuilder.java:405)
at com.fasterxml.jackson.core.util.TextBuffer.contentsAsString(TextBuffer.java:349)
at com.fasterxml.jackson.core.io.SegmentedStringWriter.getAndClear(SegmentedStringWriter.java:83)
at com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:2344)
at org.json4s.jackson.JsonMethods$class.compact(JsonMethods.scala:34)
at org.json4s.jackson.JsonMethods$.compact(JsonMethods.scala:50)
at com.snowplowanalytics.snowplow.enrich.common.outputs.BadRow.toCompactJson(BadRow.scala:86)
at com.snowplowanalytics.snowplow.enrich.hadoop.EtlJob$$anonfun$13$$anonfun$apply$1.apply(EtlJob.scala:189)
at com.snowplowanalytics.snowplow.enrich.hadoop.EtlJob$$anonfun$13$$anonfun$apply$1.apply(EtlJob.scala:188)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at scala.collection.immutable.List.foreach(List.scala:318)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
at scala.collection.AbstractTraversable.map(Traversable.scala:105)
at com.snowplowanalytics.snowplow.enrich.hadoop.EtlJob$$anonfun$13.apply(EtlJob.scala:188)
at com.snowplowanalytics.snowplow.enrich.hadoop.EtlJob$$anonfun$13.apply(EtlJob.scala:182)
at com.twitter.scalding.FlatMapFunction.operate(Operations.scala:46)
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:99)
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:39)
at cascading.flow.stream.FunctionEachStage$1.collect(FunctionEachStage.java:80)
at cascading.tuple.TupleEntryCollector.safeCollect(TupleEntryCollector.java:145)
at cascading.tuple.TupleEntryCollector.add(TupleEntryCollector.java:133)
at com.twitter.scalding.MapFunction.operate(Operations.scala:59)
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:99)
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:39)
at cascading.flow.stream.SourceStage.map(SourceStage.java:102)
at cascading.flow.stream.SourceStage.run(SourceStage.java:58)
at cascading.flow.hadoop.FlowMapper.run(FlowMapper.java:130)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:455)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:344)
2016-12-06 12:54:29,131 ERROR [main] cascading.flow.stream.TrapHandler: caught OutOfMemoryException, will not trap, rethrowing
java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOfRange(Arrays.java:2694)
at java.lang.String.(String.java:203)
at java.lang.StringBuilder.toString(StringBuilder.java:405)
at com.fasterxml.jackson.core.util.TextBuffer.contentsAsString(TextBuffer.java:349)
at com.fasterxml.jackson.core.io.SegmentedStringWriter.getAndClear(SegmentedStringWriter.java:83)
at com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:2344)
at org.json4s.jackson.JsonMethods$class.compact(JsonMethods.scala:34)
at org.json4s.jackson.JsonMethods$.compact(JsonMethods.scala:50)
at com.snowplowanalytics.snowplow.enrich.common.outputs.BadRow.toCompactJson(BadRow.scala:86)
at com.snowplowanalytics.snowplow.enrich.hadoop.EtlJob$$anonfun$13$$anonfun$apply$1.apply(EtlJob.scala:189)
at com.snowplowanalytics.snowplow.enrich.hadoop.EtlJob$$anonfun$13$$anonfun$apply$1.apply(EtlJob.scala:188)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at scala.collection.immutable.List.foreach(List.scala:318)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
at scala.collection.AbstractTraversable.map(Traversable.scala:105)
at com.snowplowanalytics.snowplow.enrich.hadoop.EtlJob$$anonfun$13.apply(EtlJob.scala:188)
at com.snowplowanalytics.snowplow.enrich.hadoop.EtlJob$$anonfun$13.apply(EtlJob.scala:182)
at com.twitter.scalding.FlatMapFunction.operate(Operations.scala:46)
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:99)
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:39)
at cascading.flow.stream.FunctionEachStage$1.collect(FunctionEachStage.java:80)
at cascading.tuple.TupleEntryCollector.safeCollect(TupleEntryCollector.java:145)
at cascading.tuple.TupleEntryCollector.add(TupleEntryCollector.java:133)
at com.twitter.scalding.MapFunction.operate(Operations.scala:59)
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:99)
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:39)
at cascading.flow.stream.SourceStage.map(SourceStage.java:102)
at cascading.flow.stream.SourceStage.run(SourceStage.java:58)
at cascading.flow.hadoop.FlowMapper.run(FlowMapper.java:130)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:455)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:344)
2016-12-06 12:54:29,136 INFO [main] com.amazon.ws.emr.hadoop.fs.s3n.MultipartUploadOutputStream: abort closed:false s3://udmd-a-storage/udmd-a-enriched/enrich/bad/run=2016-12-06-11-06-19/part-00005
2016-12-06 12:54:29,138 INFO [s3n-worker-2] com.amazonaws.latency: Exception=[com.amazonaws.AbortedException: ], ServiceName=[Amazon S3], ServiceEndpoint=[https://udmd-a-storage.s3.amazonaws.com], Exception=1, HttpClientPoolLeasedCount=1, RequestCount=1, HttpClientPoolPendingCount=0, HttpClientPoolAvailableCount=1, ClientExecuteTime=[15325.284], HttpRequestTime=[15323.544], RequestSigningTime=[0.617], CredentialsRequestTime=[0.006], HttpClientSendRequestTime=[15186.65],
2016-12-06 12:54:29,597 INFO [s3n-worker-2] com.amazon.ws.emr.hadoop.fs.s3n.MultipartUploadOutputStream: uploadPart error com.amazonaws.AbortedException:
2016-12-06 12:54:29,695 INFO [main] com.amazonaws.latency: StatusCode=[204], ServiceName=[Amazon S3], AWSRequestID=[D711B721672C29BC], ServiceEndpoint=[https://udmd-a-storage.s3.amazonaws.com], HttpClientPoolLeasedCount=0, RequestCount=1, HttpClientPoolPendingCount=0, HttpClientPoolAvailableCount=2, ClientExecuteTime=[557.278], HttpRequestTime=[549.627], HttpClientReceiveResponseTime=[26.882], RequestSigningTime=[6.532], CredentialsRequestTime=[0.005], ResponseProcessingTime=[0.015], HttpClientSendRequestTime=[0.34],
2016-12-06 12:54:29,695 WARN [main] org.apache.hadoop.hdfs.DFSClient: DFSInputStream has been closed already
2016-12-06 12:54:29,700 INFO [main] com.amazon.ws.emr.hadoop.fs.s3n.MultipartUploadOutputStream: close closed:true s3://udmd-a-storage/udmd-a-enriched/enrich/bad/run=2016-12-06-11-06-19/part-00005
2016-12-06 12:54:29,701 FATAL [main] org.apache.hadoop.mapred.YarnChild: Error running child : java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOfRange(Arrays.java:2694)
at java.lang.String.(String.java:203)
at java.lang.StringBuilder.toString(StringBuilder.java:405)
at com.fasterxml.jackson.core.util.TextBuffer.contentsAsString(TextBuffer.java:349)
at com.fasterxml.jackson.core.io.SegmentedStringWriter.getAndClear(SegmentedStringWriter.java:83)
at com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:2344)
at org.json4s.jackson.JsonMethods$class.compact(JsonMethods.scala:34)
at org.json4s.jackson.JsonMethods$.compact(JsonMethods.scala:50)
at com.snowplowanalytics.snowplow.enrich.common.outputs.BadRow.toCompactJson(BadRow.scala:86)
at com.snowplowanalytics.snowplow.enrich.hadoop.EtlJob$$anonfun$13$$anonfun$apply$1.apply(EtlJob.scala:189)
at com.snowplowanalytics.snowplow.enrich.hadoop.EtlJob$$anonfun$13$$anonfun$apply$1.apply(EtlJob.scala:188)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at scala.collection.immutable.List.foreach(List.scala:318)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
at scala.collection.AbstractTraversable.map(Traversable.scala:105)
at com.snowplowanalytics.snowplow.enrich.hadoop.EtlJob$$anonfun$13.apply(EtlJob.scala:188)
at com.snowplowanalytics.snowplow.enrich.hadoop.EtlJob$$anonfun$13.apply(EtlJob.scala:182)
at com.twitter.scalding.FlatMapFunction.operate(Operations.scala:46)
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:99)
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:39)
at cascading.flow.stream.FunctionEachStage$1.collect(FunctionEachStage.java:80)
at cascading.tuple.TupleEntryCollector.safeCollect(TupleEntryCollector.java:145)
at cascading.tuple.TupleEntryCollector.add(TupleEntryCollector.java:133)
at com.twitter.scalding.MapFunction.operate(Operations.scala:59)
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:99)
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:39)
at cascading.flow.stream.SourceStage.map(SourceStage.java:102)
at cascading.flow.stream.SourceStage.run(SourceStage.java:58)
at cascading.flow.hadoop.FlowMapper.run(FlowMapper.java:130)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:455)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:344)
please tell me how we can increase the java heap space in the cluster. There is an script in the google group to increase the heap space but the same is not working for the AMI version 4.3.0. Need help.
Thanks!
DB