加载中…
个人资料
  • 博客等级:
  • 博客积分:
  • 博客访问:
  • 关注人气:
  • 获赠金笔:0支
  • 赠出金笔:0支
  • 荣誉徽章:
正文 字体大小:

hadoop集群日常维护中遇到的一些问题汇总

(2015-08-20 14:43:23)
标签:

interruptedexception

kryoexception

断开的管道

eofexception

semanticexception

分类: hadoop
异常1: java.lang.InterruptedException
在集群执行hive脚本时,hive.log报错如下
java.lang.InterruptedException
        at java.util.concurrent.FutureTask.awaitDone(FutureTask.java:400)
        at java.util.concurrent.FutureTask.get(FutureTask.java:187)
        at org.apache.hadoop.ipc.Client$Connection.sendRpcRequest(Client.java:1030)
        at org.apache.hadoop.ipc.Client.call(Client.java:1384)
        at org.apache.hadoop.ipc.Client.call(Client.java:1364)
        at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206)
        at com.sun.proxy.$Proxy33.getTaskReports(Unknown Source)
        at org.apache.hadoop.mapreduce.v2.api.impl.pb.client.MRClientProtocolPBClientImpl.getTaskReports(MRClientProtocolPBClientImpl.java:188)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:320)
        at org.apache.hadoop.mapred.ClientServiceDelegate.getTaskReports(ClientServiceDelegate.java:444)
        at org.apache.hadoop.mapred.YARNRunner.getTaskReports(YARNRunner.java:572)
        at org.apache.hadoop.mapreduce.Job$3.run(Job.java:543)
        at org.apache.hadoop.mapreduce.Job$3.run(Job.java:541)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1642)
        at org.apache.hadoop.mapreduce.Job.getTaskReports(Job.java:541)
        at org.apache.hadoop.mapred.JobClient.getTaskReports(JobClient.java:639)
        at org.apache.hadoop.mapred.JobClient.getMapTaskReports(JobClient.java:629)
        at org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:259)
        at org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:547)
        at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:426)
        at org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:136)
        at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:153)
        at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:85)
        at org.apache.hadoop.hive.ql.exec.TaskRunner.run(TaskRunner.java:72)
2015-08-05 06:30:51,988 WARN  [Thread-854]: mapred.ClientServiceDelegate (ClientServiceDelegate.java:invoke(338)) - ClientServiceDelegate invoke call interrupted
java.lang.InterruptedException: sleep interrupted
        at java.lang.Thread.sleep(Native Method)
        at org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:336)
at org.apache.hadoop.mapred.ClientServiceDelegate.getTaskReports(ClientServiceDelegate.java:444)
        at org.apache.hadoop.mapred.YARNRunner.getTaskReports(YARNRunner.java:572)
        at org.apache.hadoop.mapreduce.Job$3.run(Job.java:543)
        at org.apache.hadoop.mapreduce.Job$3.run(Job.java:541)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1642)
        at org.apache.hadoop.mapreduce.Job.getTaskReports(Job.java:541)
        at org.apache.hadoop.mapred.JobClient.getTaskReports(JobClient.java:639)
        at org.apache.hadoop.mapred.JobClient.getMapTaskReports(JobClient.java:629)
        at org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:259)
        at org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:547)
        at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:426)
        at org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:136)
        at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:153)
        at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:85)
        at org.apache.hadoop.hive.ql.exec.TaskRunner.run(TaskRunner.java:72)
2015-08-05 06:30:51,990 ERROR [Thread-854]: exec.Task (SessionState.java:printError(567)) - Job Submission failed with exception 'org.apache.hadoop.yarn.exceptions.YarnRuntimeException(java.lang.InterruptedException: sleep interrupted)'
org.apache.hadoop.yarn.exceptions.YarnRuntimeException: java.lang.InterruptedException: sleep interrupted
        at org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:339)
        at org.apache.hadoop.mapred.ClientServiceDelegate.getTaskReports(ClientServiceDelegate.java:444)
        at org.apache.hadoop.mapred.YARNRunner.getTaskReports(YARNRunner.java:572)
        at org.apache.hadoop.mapreduce.Job$3.run(Job.java:543)
        at org.apache.hadoop.mapreduce.Job$3.run(Job.java:541)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1642)
        at org.apache.hadoop.mapreduce.Job.getTaskReports(Job.java:541)
        at org.apache.hadoop.mapred.JobClient.getTaskReports(JobClient.java:639)
        at org.apache.hadoop.mapred.JobClient.getMapTaskReports(JobClient.java:629)
        at org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:259)
        at org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:547)
        at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:426)
        at org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:136)
        at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:153)
        at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:85)
        at org.apache.hadoop.hive.ql.exec.TaskRunner.run(TaskRunner.java:72)
Caused by: java.lang.InterruptedException: sleep interrupted
        at java.lang.Thread.sleep(Native Method)
        at org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:336)
        ... 17 more
原因是因为线程在等待过程中长期获取不到资源被终止, 可以通过修改下面的配置进行优化:
dfs.datanode.handler.count(加大)DN的服务线程数。这些线程仅用于接收请求,处理业务命令
dfs.namenode.handler.count(加大)  NN的服务线程数。用于处理RPC请求



异常2: KryoException
Hive.log抛出异常信息如下
Error: java.lang.RuntimeException: org.apache.hive.com.esotericsoftware.kryo.KryoException: Encountered unregistered class ID: 73
Serialization trace:
colExprMap (org.apache.hadoop.hive.ql.exec.TableScanOperator)
aliasToWork (org.apache.hadoop.hive.ql.plan.MapWork)
        at org.apache.hadoop.hive.ql.exec.Utilities.getBaseWork(Utilities.java:366)
        at org.apache.hadoop.hive.ql.exec.Utilities.getMapWork(Utilities.java:277)
        at org.apache.hadoop.hive.ql.io.HiveInputFormat.init(HiveInputFormat.java:258)
        at org.apache.hadoop.hive.ql.io.HiveInputFormat.pushProjectionsAndFilters(HiveInputFormat.java:451)
        at org.apache.hadoop.hive.ql.io.HiveInputFormat.pushProjectionsAndFilters(HiveInputFormat.java:444)
        at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getRecordReader(CombineHiveInputFormat.java:588)
        at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.(MapTask.java:169)
        at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:429)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
        at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1642)
        at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
Caused by: org.apache.hive.com.esotericsoftware.kryo.KryoException: Encountered unregistered class ID: 73
Serialization trace:
colExprMap (org.apache.hadoop.hive.ql.exec.TableScanOperator)
aliasToWork (org.apache.hadoop.hive.ql.plan.MapWork)
        at org.apache.hive.com.esotericsoftware.kryo.util.DefaultClassResolver.readClass(DefaultClassResolver.java:119)
        at org.apache.hive.com.esotericsoftware.kryo.Kryo.readClass(Kryo.java:656)
        at org.apache.hive.com.esotericsoftware.kryo.serializers.ObjectField.read(ObjectField.java:99)
        at org.apache.hive.com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:507)
        at org.apache.hive.com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:776)
        at org.apache.hive.com.esotericsoftware.kryo.serializers.MapSerializer.read(MapSerializer.java:139)
        at org.apache.hive.com.esotericsoftware.kryo.serializers.MapSerializer.read(MapSerializer.java:17)
        at org.apache.hive.com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:694)
        at org.apache.hive.com.esotericsoftware.kryo.serializers.ObjectField.read(ObjectField.java:106)
        at org.apache.hive.com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:507)
        at org.apache.hive.com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:672)
        at org.apache.hadoop.hive.ql.exec.Utilities.deserializeObjectByKryo(Utilities.java:943)
原因是hive-exec包的org.apache.hadoop.hive.ql.exec.Utilities中默认使用cloningQueryPlanKryo来存储数据,多线程时覆盖影响的0.13.1的bug,在0.14中已经得到修复,偶发性的数据序列化错误,很难重现, 可以修改hive.plan.serialization.format参数值为javaXML来避免错误,默认是kryo序列化格式,没进行尝试因为问题不容易重新;以下是参考资料 
https://issues.apache.org/jira/browse/HIVE-8688?jql=project in (MAPREDUCE, HIVE, HADOOP, HDFS) AND text ~ "org.apache.hive.com.esotericsoftware.kryo.KryoException" ORDER BY priority DESC
https://issues.apache.org/jira/browse/HIVE-7711?jql=project in (MAPREDUCE, HIVE, HADOOP, HDFS) AND text ~ "org.apache.hive.com.esotericsoftware.kryo.KryoException" ORDER BY priority DESC



异常3: java.io.IOException: 断开的管道
下面是在执行hive ql时后台报的错误,原因是在datanode上写入文件时,传输管道中断导致失败
2015-08-10 02:46:16,576 WARN  [Thread-80]: mapreduce.JobSubmitter (JobSubmitter.java:copyAndConfigureFiles(151)) - Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
2015-08-10 02:46:18,035 WARN  [DataStreamer for file /tmp/hadoop-yarn/staging/hhive/.staging/job_1439027917379_19258/job.jar block BP-1797264656-192.168.4.128-1431244532842:blk_1094532254_20796956]: hdfs.DFSClient (DFSOutputStream.java:run(639)) - DataStreamer Exception
java.io.IOException: 断开的管道
        at sun.nio.ch.FileDispatcherImpl.write0(Native Method)
        at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)
        at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)
        at sun.nio.ch.IOUtil.write(IOUtil.java:65)
        at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:487)
        at org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:63)
        at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142)
        at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:159)
        at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:117)
        at java.io.BufferedOutputStream.write(BufferedOutputStream.java:122)
        at java.io.DataOutputStream.write(DataOutputStream.java:107)
        at org.apache.hadoop.hdfs.DFSOutputStream$Packet.writeTo(DFSOutputStream.java:285)
        at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:591)
2015-08-10 02:46:18,037 WARN  [DataStreamer for file /tmp/hadoop-yarn/staging/hhive/.staging/job_1439027917379_19261/job.jar block BP-1797264656-192.168.4.128-1431244532842:blk_1094532251_20796953]: hdfs.DFSClient (DFSOutputStream.java:run(639)) - DataStreamer Exception
java.io.IOException: 断开的管道
        at sun.nio.ch.FileDispatcherImpl.write0(Native Method)
        at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)
        at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)
        at sun.nio.ch.IOUtil.write(IOUtil.java:65)
        at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:487)
        at org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:63)
        at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142)
        at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:159)
        at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:117)
        at java.io.BufferedOutputStream.write(BufferedOutputStream.java:122)
        at java.io.DataOutputStream.write(DataOutputStream.java:107)
        at org.apache.hadoop.hdfs.DFSOutputStream$Packet.writeTo(DFSOutputStream.java:285)
        at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:591)
2015-08-10 02:46:18,044 WARN  [DataStreamer for file /tmp/hadoop-yarn/staging/hhive/.staging/job_1439027917379_19258/job.jar block BP-1797264656-192.168.4.128-1431244532842:blk_1094532254_20796956]: hdfs.DFSClient (DFSOutputStream.java:setupPipelineForAppendOrRecovery(1152)) - Error Recovery for block BP-1797264656-192.168.4.128-1431244532842:blk_1094532254_20796956 in pipeline 192.168.16.71:50010, 192.168.4.146:50010, 192.168.16.70:50010: bad datanode 192.168.16.71:50010
2015-08-10 02:46:18,089 WARN  [DataStreamer for file /tmp/hadoop-yarn/staging/hhive/.staging/job_1439027917379_19260/job.jar block BP-1797264656-192.168.4.128-1431244532842:blk_1094532257_20796959]: hdfs.DFSClient (DFSOutputStream.java:run(639)) - DataStreamer Exception
2842:blk_1094532257_20796959]: hdfs.DFSClient (DFSOutputStream.java:run(639)) - DataStreamer Exception
java.io.IOException: 断开的管道
        at sun.nio.ch.FileDispatcherImpl.write0(Native Method)
        at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)
        at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)
        at sun.nio.ch.IOUtil.write(IOUtil.java:65)
        at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:487)
        at org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:63)
        at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142)
        at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:159)
        at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:117)
        at java.io.BufferedOutputStream.write(BufferedOutputStream.java:122)
        at java.io.DataOutputStream.write(DataOutputStream.java:107)
        at org.apache.hadoop.hdfs.DFSOutputStream$Packet.writeTo(DFSOutputStream.java:285)
        at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:591)
2015-08-10 02:46:18,127 WARN  [Thread-13]: hdfs.DFSClient (DFSOutputStream.java:waitForAckedSeqno(2074)) - Slow waitForAckedSeqno took 73275ms (threshold=30000ms)
2015-08-10 02:46:18,140 WARN  [Thread-10]: hdfs.DFSClient (DFSOutputStream.java:waitForAckedSeqno(2074)) - Slow waitForAckedSeqno took 73884ms (threshold=30000ms)
2015-08-10 02:46:18,190 WARN  [DataStreamer for file /tmp/hadoop-yarn/staging/hhive/.staging/job_1439027917379_19257/job.jar block BP-1797264656-192.168.4.128-1431244532842:blk_1094532259_20796961]: hdfs.DFSClient (DFSOutputStream.java:run(639)) - DataStreamer Exception
java.io.IOException: 断开的管道
        at sun.nio.ch.FileDispatcherImpl.write0(Native Method)
        at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)
        at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)
        at sun.nio.ch.IOUtil.write(IOUtil.java:65)
        at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:487)
        at org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:63)
        at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142)
        at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:159)
        at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:117)
        at java.io.BufferedOutputStream.write(BufferedOutputStream.java:122)
        at java.io.DataOutputStream.write(DataOutputStream.java:107)
        at org.apache.hadoop.hdfs.DFSOutputStream$Packet.writeTo(DFSOutputStream.java:285)
        at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:591)
2015-08-10 02:46:18,190 WARN  [DataStreamer for file /tmp/hadoop-yarn/staging/hhive/.staging/job_1439027917379_19256/job.jar block BP-1797264656-192.168.4.128-1431244532842:blk_1094532258_20796960]: hdfs.DFSClient (DFSOutputStream.java:run(639)) - DataStreamer Exception
java.io.IOException: 断开的管道
        at sun.nio.ch.FileDispatcherImpl.write0(Native Method)
        at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)
        at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)
        at sun.nio.ch.IOUtil.write(IOUtil.java:65)
        at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:487)
        at org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:63)
        at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142)
        at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:159)
        at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:117)
        at java.io.BufferedOutputStream.write(BufferedOutputStream.java:122)
        at java.io.DataOutputStream.write(DataOutputStream.java:107)
        at org.apache.hadoop.hdfs.DFSOutputStream$Packet.writeTo(DFSOutputStream.java:285)
        at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:591)
2015-08-10 02:46:18,258 WARN  [Thread-11]: hdfs.DFSClient (DFSOutputStream.java:waitForAckedSeqno(2074)) - Slow waitForAckedSeqno took 73906ms (threshold=30000ms)
2015-08-10 02:46:18,295 WARN  [Thread-17]: hdfs.DFSClient (DFSOutputStream.java:waitForAckedSeqno(2074)) - Slow waitForAckedSeqno took 74019ms (threshold=30000ms)
2015-08-10 02:46:18,961 WARN  [ResponseProcessor for block BP-1797264656-192.168.4.128-1431244532842:blk_1094532257_20796959]: hdfs.DFSClient (DFSOutputStream.java:run(814)) - Slow ReadProcessor read fields took 68627ms (threshold=30000ms); ack: seqno: -2 status: SUCCESS status: SUCCESS status: ERROR downstreamAckTimeNanos: 0, targets: [192.168.4.119:50010, 192.168.4.118:50010, 192.168.4.147:50010]
可以通过修改下面几个参数对集群读写性能进行优化
dfs.datanode.handler.count(加大)DN的服务线程数。这些线程仅用于接收请求,处理业务命令
dfs.namenode.handler.count(加大)  NN的服务线程数。用于处理RPC请求
dfs.namenode.avoid.read.stale.datanode(true)决定是否避开从脏DN上读数据。脏DN指在一个指定的时间间隔内没有收到心跳信息。脏DN将被移到可以读取(写入)节点列表的尾端。尝试开启
dfs.namenode.avoid.write.stale.datanode(true)  和上面相似,是为了避免向脏DN写数据



异常4: java.io.EOFException: Premature EOF
java.io.EOFException: Premature EOF: no length prefix available
        at org.apache.hadoop.hdfs.protocolPB.PBHelper.vintPrefixed(PBHelper.java:2109)
        at org.apache.hadoop.hdfs.protocol.datatransfer.PipelineAck.readFields(PipelineAck.java:176)
        at org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.run(BlockReceiver.java:1151)
        at java.lang.Thread.run(Thread.java:745)
2015-08-05 06:28:53,165 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Exception for BP-1797264656-192.168.4.128-1431244532842:blk_1093571274_19834793
java.io.IOException: Premature EOF from inputStream
        at org.apache.hadoop.io.IOUtils.readFully(IOUtils.java:194)
        at org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver.doReadFully(PacketReceiver.java:213)
        at org.apache.hadoop.hdfs.protocol.datatransfer.Receiver.processOp(Receiver.java:72)
        at org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:226)
        at java.lang.Thread.run(Thread.java:745)
2015-08-05 06:28:53,165 WARN org.apache.hadoop.hdfs.server.datanode.DataNode: IOException in BlockReceiver.run():
文件操作超租期,实际上就是data stream操作过程中文件被删掉了 可以加大这个参数来进行优化dfs.datanode.max.transfer.threads;
参考资料:
https://issues.apache.org/jira/browse/HDFS-4723   http://www.sharpcloud.cn/thread-4927-1-1.html



异常5: Connection reset by peer
java.io.IOException: Connection reset by peer
        at sun.nio.ch.FileDispatcherImpl.write0(Native Method)
        at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)
        at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)
        at sun.nio.ch.IOUtil.write(IOUtil.java:65)
        at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:487)
        at org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:63)
        at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142)
        at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:159)
        at java.io.BufferedOutputStream.flush(BufferedOutputStream.java:140)
        at java.io.DataOutputStream.flush(DataOutputStream.java:123)
        at org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.sendAckUpstreamUnprotected(BlockReceiver.java:1396)
        at org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.sendAckUpstream(BlockReceiver.java:1335)
        at org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.run(BlockReceiver.java:1256)
        at java.lang.Thread.run(Thread.java:745)
java.io.IOException: Connection reset by peer
        at sun.nio.ch.FileDispatcherImpl.write0(Native Method)
        at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)
        at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)
        at sun.nio.ch.IOUtil.write(IOUtil.java:65)
        at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:487)
        at org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:63)
        at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142)
        at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:159)
        at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:117)
datanode重置链接   The client is stuck in an RPC to NameNode. Currently RPCs can be wait for a long time if the server is busy.  
可以通过修改下面几个参数来优化
dfs.namenode.handler.count(加大)  NN的服务线程数。用于处理RPC请求
dfs.namenode.replication.interval(减小)  NN周期性计算DN的副本情况的频率,秒
dfs.client.failover.connection.retries(建议加大)  专家设置。IPC客户端失败重试次数。在网络不稳定时建议加大此值
dfs.client.failover.connection.retries.on.timeouts(网络不稳定建议加大)专家设置。IPC客户端失败重试次数,此失败仅指超时失败。在网络不稳定时建议加大此值
参考资料:https://issues.apache.org/jira/browse/HADOOP-3657



异常6: SemanticException Schema of both sides of union should match
2015-08-10 17:22:35,390 ERROR [main]: ql.Driver (SessionState.java:printError(567)) - FAILED: SemanticException Schema of both sides of union should match.
org.apache.hadoop.hive.ql.parse.SemanticException: Schema of both sides of union should match.
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genUnionPlan(SemanticAnalyzer.java:8430)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9004)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9025)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9011)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:9397)
        at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:204)
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:437)
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:335)
        at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1026)
        at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1091)
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:962)
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:952)
        at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269)
        at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:221)
        at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:431)
        at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:800)
        at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:694)
        at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:633)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
   at org.apache.hadoop.util.RunJar.main(RunJar.java:212)
语法错误,使用union查询时查询两个表的内容不同,测试sql时报的错,不是集群作业的,检查hive ql改正即可



异常7:java.net.SocketTimeoutException
java.io.IOException: Bad response ERROR for block BP-1797264656-192.168.4.128-1431244532842:blk_1094409843_20674430 from datanode 192.168.4.118:50010
        at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer$ResponseProcessor.run(DFSOutputStream.java:840)
java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/192.168.16.70:50010 remote=/192.168.4.143:52416]
        at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:164)
        at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:161)
        at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:131)
        at java.io.BufferedInputStream.fill(BufferedInputStream.java:235)
        at java.io.BufferedInputStream.read1(BufferedInputStream.java:275)
        at java.io.BufferedInputStream.read(BufferedInputStream.java:334)
        at java.io.DataInputStream.read(DataInputStream.java:149)
        at org.apache.hadoop.io.IOUtils.readFully(IOUtils.java:192)
        at org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver.doReadFully(PacketReceiver.java:213)
        at org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver.doRead(PacketReceiver.java:134)
        at org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver.receiveNextPacket(PacketReceiver.java:109)
        at org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receivePacket(BlockReceiver.java:468)
        at org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receiveBlock(BlockReceiver.java:772)
        at org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:724)
        at org.apache.hadoop.hdfs.protocol.datatransfer.Receiver.opWriteBlock(Receiver.java:126)
        at org.apache.hadoop.hdfs.protocol.datatransfer.Receiver.processOp(Receiver.java:72)
        at org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:226)
        at java.lang.Thread.run(Thread.java:745)
IO超时加大下面俩个参数即可
dfs.datanode.socket.write.timeout (加大)向datanode写入数据超时设置
dfs.client.socket-timeout (加大) dfsclients与集群间进行网络通信的超时设置
参考资料:http://www.sharpcloud.cn/thread-4927-1-1.html

0

阅读 收藏 喜欢 打印举报/Report
  

新浪BLOG意见反馈留言板 欢迎批评指正

新浪简介 | About Sina | 广告服务 | 联系我们 | 招聘信息 | 网站律师 | SINA English | 产品答疑

新浪公司 版权所有