异常1: java.lang.InterruptedException
在集群执行hive脚本时,hive.log报错如下
java.lang.InterruptedException
at
java.util.concurrent.FutureTask.awaitDone(FutureTask.java:400)
at
java.util.concurrent.FutureTask.get(FutureTask.java:187)
at
org.apache.hadoop.ipc.Client$Connection.sendRpcRequest(Client.java:1030)
at
org.apache.hadoop.ipc.Client.call(Client.java:1384)
at
org.apache.hadoop.ipc.Client.call(Client.java:1364)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206)
at com.sun.proxy.$Proxy33.getTaskReports(Unknown
Source)
at
org.apache.hadoop.mapreduce.v2.api.impl.pb.client.MRClientProtocolPBClientImpl.getTaskReports(MRClientProtocolPBClientImpl.java:188)
at
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at
java.lang.reflect.Method.invoke(Method.java:606)
at
org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:320)
at
org.apache.hadoop.mapred.ClientServiceDelegate.getTaskReports(ClientServiceDelegate.java:444)
at
org.apache.hadoop.mapred.YARNRunner.getTaskReports(YARNRunner.java:572)
at
org.apache.hadoop.mapreduce.Job$3.run(Job.java:543)
at
org.apache.hadoop.mapreduce.Job$3.run(Job.java:541)
at
java.security.AccessController.doPrivileged(Native Method)
at
javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1642)
at
org.apache.hadoop.mapreduce.Job.getTaskReports(Job.java:541)
at
org.apache.hadoop.mapred.JobClient.getTaskReports(JobClient.java:639)
at
org.apache.hadoop.mapred.JobClient.getMapTaskReports(JobClient.java:629)
at
org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:259)
at
org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:547)
at
org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:426)
at
org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:136)
at
org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:153)
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:85)
at
org.apache.hadoop.hive.ql.exec.TaskRunner.run(TaskRunner.java:72)
2015-08-05 06:30:51,988 WARN [Thread-854]:
mapred.ClientServiceDelegate
(ClientServiceDelegate.java:invoke(338)) - ClientServiceDelegate
invoke call interrupted
java.lang.InterruptedException: sleep interrupted
at java.lang.Thread.sleep(Native Method)
at
org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:336)
at
org.apache.hadoop.mapred.ClientServiceDelegate.getTaskReports(ClientServiceDelegate.java:444)
at
org.apache.hadoop.mapred.YARNRunner.getTaskReports(YARNRunner.java:572)
at
org.apache.hadoop.mapreduce.Job$3.run(Job.java:543)
at
org.apache.hadoop.mapreduce.Job$3.run(Job.java:541)
at
java.security.AccessController.doPrivileged(Native Method)
at
javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1642)
at
org.apache.hadoop.mapreduce.Job.getTaskReports(Job.java:541)
at
org.apache.hadoop.mapred.JobClient.getTaskReports(JobClient.java:639)
at
org.apache.hadoop.mapred.JobClient.getMapTaskReports(JobClient.java:629)
at
org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:259)
at
org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:547)
at
org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:426)
at
org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:136)
at
org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:153)
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:85)
at
org.apache.hadoop.hive.ql.exec.TaskRunner.run(TaskRunner.java:72)
2015-08-05 06:30:51,990 ERROR [Thread-854]: exec.Task
(SessionState.java:printError(567)) - Job Submission failed with
exception
'org.apache.hadoop.yarn.exceptions.YarnRuntimeException(java.lang.InterruptedException:
sleep interrupted)'
org.apache.hadoop.yarn.exceptions.YarnRuntimeException:
java.lang.InterruptedException: sleep interrupted
at
org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:339)
at
org.apache.hadoop.mapred.ClientServiceDelegate.getTaskReports(ClientServiceDelegate.java:444)
at
org.apache.hadoop.mapred.YARNRunner.getTaskReports(YARNRunner.java:572)
at
org.apache.hadoop.mapreduce.Job$3.run(Job.java:543)
at
org.apache.hadoop.mapreduce.Job$3.run(Job.java:541)
at
java.security.AccessController.doPrivileged(Native Method)
at
javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1642)
at
org.apache.hadoop.mapreduce.Job.getTaskReports(Job.java:541)
at
org.apache.hadoop.mapred.JobClient.getTaskReports(JobClient.java:639)
at
org.apache.hadoop.mapred.JobClient.getMapTaskReports(JobClient.java:629)
at
org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:259)
at
org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:547)
at
org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:426)
at
org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:136)
at
org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:153)
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:85)
at
org.apache.hadoop.hive.ql.exec.TaskRunner.run(TaskRunner.java:72)
Caused by: java.lang.InterruptedException: sleep
interrupted
at java.lang.Thread.sleep(Native Method)
at
org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:336)
... 17 more
原因是因为线程在等待过程中长期获取不到资源被终止, 可以通过修改下面的配置进行优化:
dfs.datanode.handler.count(加大)DN的服务线程数。这些线程仅用于接收请求,处理业务命令
dfs.namenode.handler.count(加大)
NN的服务线程数。用于处理RPC请求
异常2: KryoException
Hive.log抛出异常信息如下
Error: java.lang.RuntimeException:
org.apache.hive.com.esotericsoftware.kryo.KryoException:
Encountered unregistered class ID: 73
Serialization trace:
colExprMap
(org.apache.hadoop.hive.ql.exec.TableScanOperator)
aliasToWork (org.apache.hadoop.hive.ql.plan.MapWork)
at
org.apache.hadoop.hive.ql.exec.Utilities.getBaseWork(Utilities.java:366)
at
org.apache.hadoop.hive.ql.exec.Utilities.getMapWork(Utilities.java:277)
at
org.apache.hadoop.hive.ql.io.HiveInputFormat.init(HiveInputFormat.java:258)
at
org.apache.hadoop.hive.ql.io.HiveInputFormat.pushProjectionsAndFilters(HiveInputFormat.java:451)
at
org.apache.hadoop.hive.ql.io.HiveInputFormat.pushProjectionsAndFilters(HiveInputFormat.java:444)
at
org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getRecordReader(CombineHiveInputFormat.java:588)
at
org.apache.hadoop.mapred.MapTask$TrackedRecordReader.(MapTask.java:169)
at
org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:429)
at
org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
at
org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at
java.security.AccessController.doPrivileged(Native Method)
at
javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1642)
at
org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
Caused by:
org.apache.hive.com.esotericsoftware.kryo.KryoException:
Encountered unregistered class ID: 73
Serialization trace:
colExprMap
(org.apache.hadoop.hive.ql.exec.TableScanOperator)
aliasToWork (org.apache.hadoop.hive.ql.plan.MapWork)
at
org.apache.hive.com.esotericsoftware.kryo.util.DefaultClassResolver.readClass(DefaultClassResolver.java:119)
at
org.apache.hive.com.esotericsoftware.kryo.Kryo.readClass(Kryo.java:656)
at
org.apache.hive.com.esotericsoftware.kryo.serializers.ObjectField.read(ObjectField.java:99)
at
org.apache.hive.com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:507)
at
org.apache.hive.com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:776)
at
org.apache.hive.com.esotericsoftware.kryo.serializers.MapSerializer.read(MapSerializer.java:139)
at
org.apache.hive.com.esotericsoftware.kryo.serializers.MapSerializer.read(MapSerializer.java:17)
at
org.apache.hive.com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:694)
at
org.apache.hive.com.esotericsoftware.kryo.serializers.ObjectField.read(ObjectField.java:106)
at
org.apache.hive.com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:507)
at
org.apache.hive.com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:672)
at
org.apache.hadoop.hive.ql.exec.Utilities.deserializeObjectByKryo(Utilities.java:943)
原因是hive-exec包的org.apache.hadoop.hive.ql.exec.Utilities中默认使用cloningQueryPlanKryo来存储数据,多线程时覆盖影响的0.13.1的bug,在0.14中已经得到修复,偶发性的数据序列化错误,很难重现,
可以修改hive.plan.serialization.format参数值为javaXML来避免错误,默认是kryo序列化格式,没进行尝试因为问题不容易重新;以下是参考资料
https://issues.apache.org/jira/browse/HIVE-8688?jql=project in
(MAPREDUCE, HIVE, HADOOP, HDFS) AND text ~
"org.apache.hive.com.esotericsoftware.kryo.KryoException" ORDER BY
priority DESC
https://issues.apache.org/jira/browse/HIVE-7711?jql=project in
(MAPREDUCE, HIVE, HADOOP, HDFS) AND text ~
"org.apache.hive.com.esotericsoftware.kryo.KryoException" ORDER BY
priority DESC
异常3: java.io.IOException: 断开的管道
下面是在执行hive ql时后台报的错误,原因是在datanode上写入文件时,传输管道中断导致失败
2015-08-10 02:46:16,576 WARN [Thread-80]:
mapreduce.JobSubmitter
(JobSubmitter.java:copyAndConfigureFiles(151)) - Hadoop
command-line option parsing not performed. Implement the Tool
interface and execute your application with ToolRunner to remedy
this.
2015-08-10 02:46:18,035 WARN [DataStreamer
for file
/tmp/hadoop-yarn/staging/hhive/.staging/job_1439027917379_19258/job.jar
block
BP-1797264656-192.168.4.128-1431244532842:blk_1094532254_20796956]:
hdfs.DFSClient (DFSOutputStream.java:run(639)) - DataStreamer
Exception
java.io.IOException: 断开的管道
at sun.nio.ch.FileDispatcherImpl.write0(Native
Method)
at
sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)
at
sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)
at sun.nio.ch.IOUtil.write(IOUtil.java:65)
at
sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:487)
at
org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:63)
at
org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142)
at
org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:159)
at
org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:117)
at
java.io.BufferedOutputStream.write(BufferedOutputStream.java:122)
at
java.io.DataOutputStream.write(DataOutputStream.java:107)
at
org.apache.hadoop.hdfs.DFSOutputStream$Packet.writeTo(DFSOutputStream.java:285)
at
org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:591)
2015-08-10 02:46:18,037 WARN [DataStreamer
for file
/tmp/hadoop-yarn/staging/hhive/.staging/job_1439027917379_19261/job.jar
block
BP-1797264656-192.168.4.128-1431244532842:blk_1094532251_20796953]:
hdfs.DFSClient (DFSOutputStream.java:run(639)) - DataStreamer
Exception
java.io.IOException: 断开的管道
at sun.nio.ch.FileDispatcherImpl.write0(Native
Method)
at
sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)
at
sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)
at sun.nio.ch.IOUtil.write(IOUtil.java:65)
at
sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:487)
at
org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:63)
at
org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142)
at
org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:159)
at
org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:117)
at
java.io.BufferedOutputStream.write(BufferedOutputStream.java:122)
at
java.io.DataOutputStream.write(DataOutputStream.java:107)
at
org.apache.hadoop.hdfs.DFSOutputStream$Packet.writeTo(DFSOutputStream.java:285)
at
org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:591)
2015-08-10 02:46:18,044 WARN [DataStreamer
for file
/tmp/hadoop-yarn/staging/hhive/.staging/job_1439027917379_19258/job.jar
block
BP-1797264656-192.168.4.128-1431244532842:blk_1094532254_20796956]:
hdfs.DFSClient
(DFSOutputStream.java:setupPipelineForAppendOrRecovery(1152)) -
Error Recovery for block
BP-1797264656-192.168.4.128-1431244532842:blk_1094532254_20796956
in pipeline 192.168.16.71:50010, 192.168.4.146:50010,
192.168.16.70:50010: bad datanode 192.168.16.71:50010
2015-08-10 02:46:18,089 WARN [DataStreamer
for file
/tmp/hadoop-yarn/staging/hhive/.staging/job_1439027917379_19260/job.jar
block
BP-1797264656-192.168.4.128-1431244532842:blk_1094532257_20796959]:
hdfs.DFSClient (DFSOutputStream.java:run(639)) - DataStreamer
Exception
2842:blk_1094532257_20796959]: hdfs.DFSClient
(DFSOutputStream.java:run(639)) - DataStreamer Exception
java.io.IOException: 断开的管道
at sun.nio.ch.FileDispatcherImpl.write0(Native
Method)
at
sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)
at
sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)
at sun.nio.ch.IOUtil.write(IOUtil.java:65)
at
sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:487)
at
org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:63)
at
org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142)
at
org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:159)
at
org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:117)
at
java.io.BufferedOutputStream.write(BufferedOutputStream.java:122)
at
java.io.DataOutputStream.write(DataOutputStream.java:107)
at
org.apache.hadoop.hdfs.DFSOutputStream$Packet.writeTo(DFSOutputStream.java:285)
at
org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:591)
2015-08-10 02:46:18,127 WARN [Thread-13]:
hdfs.DFSClient (DFSOutputStream.java:waitForAckedSeqno(2074)) -
Slow waitForAckedSeqno took 73275ms (threshold=30000ms)
2015-08-10 02:46:18,140 WARN [Thread-10]:
hdfs.DFSClient (DFSOutputStream.java:waitForAckedSeqno(2074)) -
Slow waitForAckedSeqno took 73884ms (threshold=30000ms)
2015-08-10 02:46:18,190 WARN [DataStreamer
for file
/tmp/hadoop-yarn/staging/hhive/.staging/job_1439027917379_19257/job.jar
block
BP-1797264656-192.168.4.128-1431244532842:blk_1094532259_20796961]:
hdfs.DFSClient (DFSOutputStream.java:run(639)) - DataStreamer
Exception
java.io.IOException: 断开的管道
at sun.nio.ch.FileDispatcherImpl.write0(Native
Method)
at
sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)
at
sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)
at sun.nio.ch.IOUtil.write(IOUtil.java:65)
at
sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:487)
at
org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:63)
at
org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142)
at
org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:159)
at
org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:117)
at
java.io.BufferedOutputStream.write(BufferedOutputStream.java:122)
at
java.io.DataOutputStream.write(DataOutputStream.java:107)
at
org.apache.hadoop.hdfs.DFSOutputStream$Packet.writeTo(DFSOutputStream.java:285)
at
org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:591)
2015-08-10 02:46:18,190 WARN [DataStreamer
for file
/tmp/hadoop-yarn/staging/hhive/.staging/job_1439027917379_19256/job.jar
block
BP-1797264656-192.168.4.128-1431244532842:blk_1094532258_20796960]:
hdfs.DFSClient (DFSOutputStream.java:run(639)) - DataStreamer
Exception
java.io.IOException: 断开的管道
at sun.nio.ch.FileDispatcherImpl.write0(Native
Method)
at
sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)
at
sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)
at sun.nio.ch.IOUtil.write(IOUtil.java:65)
at
sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:487)
at
org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:63)
at
org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142)
at
org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:159)
at
org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:117)
at
java.io.BufferedOutputStream.write(BufferedOutputStream.java:122)
at
java.io.DataOutputStream.write(DataOutputStream.java:107)
at
org.apache.hadoop.hdfs.DFSOutputStream$Packet.writeTo(DFSOutputStream.java:285)
at
org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:591)
2015-08-10 02:46:18,258 WARN [Thread-11]:
hdfs.DFSClient (DFSOutputStream.java:waitForAckedSeqno(2074)) -
Slow waitForAckedSeqno took 73906ms (threshold=30000ms)
2015-08-10 02:46:18,295 WARN [Thread-17]:
hdfs.DFSClient (DFSOutputStream.java:waitForAckedSeqno(2074)) -
Slow waitForAckedSeqno took 74019ms (threshold=30000ms)
2015-08-10 02:46:18,961 WARN
[ResponseProcessor for block
BP-1797264656-192.168.4.128-1431244532842:blk_1094532257_20796959]:
hdfs.DFSClient (DFSOutputStream.java:run(814)) - Slow ReadProcessor
read fields took 68627ms (threshold=30000ms); ack: seqno: -2
status: SUCCESS status: SUCCESS status: ERROR
downstreamAckTimeNanos: 0, targets: [192.168.4.119:50010,
192.168.4.118:50010, 192.168.4.147:50010]
可以通过修改下面几个参数对集群读写性能进行优化
dfs.datanode.handler.count(加大)DN的服务线程数。这些线程仅用于接收请求,处理业务命令
dfs.namenode.handler.count(加大)
NN的服务线程数。用于处理RPC请求
dfs.namenode.avoid.read.stale.datanode(true)决定是否避开从脏DN上读数据。脏DN指在一个指定的时间间隔内没有收到心跳信息。脏DN将被移到可以读取(写入)节点列表的尾端。尝试开启
dfs.namenode.avoid.write.stale.datanode(true)
和上面相似,是为了避免向脏DN写数据
异常4: java.io.EOFException: Premature EOF
java.io.EOFException: Premature EOF: no length prefix
available
at
org.apache.hadoop.hdfs.protocolPB.PBHelper.vintPrefixed(PBHelper.java:2109)
at
org.apache.hadoop.hdfs.protocol.datatransfer.PipelineAck.readFields(PipelineAck.java:176)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.run(BlockReceiver.java:1151)
at java.lang.Thread.run(Thread.java:745)
2015-08-05 06:28:53,165 INFO
org.apache.hadoop.hdfs.server.datanode.DataNode: Exception for
BP-1797264656-192.168.4.128-1431244532842:blk_1093571274_19834793
java.io.IOException: Premature EOF from inputStream
at
org.apache.hadoop.io.IOUtils.readFully(IOUtils.java:194)
at
org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver.doReadFully(PacketReceiver.java:213)
at
org.apache.hadoop.hdfs.protocol.datatransfer.Receiver.processOp(Receiver.java:72)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:226)
at java.lang.Thread.run(Thread.java:745)
2015-08-05 06:28:53,165 WARN
org.apache.hadoop.hdfs.server.datanode.DataNode: IOException in
BlockReceiver.run():
文件操作超租期,实际上就是data stream操作过程中文件被删掉了
可以加大这个参数来进行优化dfs.datanode.max.transfer.threads;
参考资料:
https://issues.apache.org/jira/browse/HDFS-4723
http://www.sharpcloud.cn/thread-4927-1-1.html
异常5: Connection reset by peer
java.io.IOException: Connection reset by peer
at sun.nio.ch.FileDispatcherImpl.write0(Native
Method)
at
sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)
at
sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)
at sun.nio.ch.IOUtil.write(IOUtil.java:65)
at
sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:487)
at
org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:63)
at
org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142)
at
org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:159)
at
java.io.BufferedOutputStream.flush(BufferedOutputStream.java:140)
at
java.io.DataOutputStream.flush(DataOutputStream.java:123)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.sendAckUpstreamUnprotected(BlockReceiver.java:1396)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.sendAckUpstream(BlockReceiver.java:1335)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.run(BlockReceiver.java:1256)
at java.lang.Thread.run(Thread.java:745)
java.io.IOException: Connection reset by peer
at sun.nio.ch.FileDispatcherImpl.write0(Native
Method)
at
sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)
at
sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)
at sun.nio.ch.IOUtil.write(IOUtil.java:65)
at
sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:487)
at
org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:63)
at
org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142)
at
org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:159)
at
org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:117)
datanode重置链接 The client is stuck in an RPC
to NameNode. Currently RPCs can be wait for a long time if the
server is busy.
可以通过修改下面几个参数来优化
dfs.namenode.handler.count(加大)
NN的服务线程数。用于处理RPC请求
dfs.namenode.replication.interval(减小)
NN周期性计算DN的副本情况的频率,秒
dfs.client.failover.connection.retries(建议加大)
专家设置。IPC客户端失败重试次数。在网络不稳定时建议加大此值
dfs.client.failover.connection.retries.on.timeouts(网络不稳定建议加大)专家设置。IPC客户端失败重试次数,此失败仅指超时失败。在网络不稳定时建议加大此值
参考资料:https://issues.apache.org/jira/browse/HADOOP-3657
异常6: SemanticException Schema of both sides of union should
match
2015-08-10 17:22:35,390 ERROR [main]: ql.Driver
(SessionState.java:printError(567)) - FAILED: SemanticException
Schema of both sides of union should match.
org.apache.hadoop.hive.ql.parse.SemanticException: Schema of
both sides of union should match.
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genUnionPlan(SemanticAnalyzer.java:8430)
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9004)
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9025)
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9011)
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:9397)
at
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:204)
at
org.apache.hadoop.hive.ql.Driver.compile(Driver.java:437)
at
org.apache.hadoop.hive.ql.Driver.compile(Driver.java:335)
at
org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1026)
at
org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1091)
at
org.apache.hadoop.hive.ql.Driver.run(Driver.java:962)
at
org.apache.hadoop.hive.ql.Driver.run(Driver.java:952)
at
org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269)
at
org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:221)
at
org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:431)
at
org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:800)
at
org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:694)
at
org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:633)
at
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at
java.lang.reflect.Method.invoke(Method.java:606)
at
org.apache.hadoop.util.RunJar.main(RunJar.java:212)
语法错误,使用union查询时查询两个表的内容不同,测试sql时报的错,不是集群作业的,检查hive
ql改正即可
异常7:java.net.SocketTimeoutException
java.io.IOException: Bad response ERROR for block
BP-1797264656-192.168.4.128-1431244532842:blk_1094409843_20674430
from datanode 192.168.4.118:50010
at
org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer$ResponseProcessor.run(DFSOutputStream.java:840)
java.net.SocketTimeoutException: 60000 millis timeout while
waiting for channel to be ready for read. ch :
java.nio.channels.SocketChannel[connected
local=/192.168.16.70:50010 remote=/192.168.4.143:52416]
at
org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:164)
at
org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:161)
at
org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:131)
at
java.io.BufferedInputStream.fill(BufferedInputStream.java:235)
at
java.io.BufferedInputStream.read1(BufferedInputStream.java:275)
at
java.io.BufferedInputStream.read(BufferedInputStream.java:334)
at
java.io.DataInputStream.read(DataInputStream.java:149)
at
org.apache.hadoop.io.IOUtils.readFully(IOUtils.java:192)
at
org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver.doReadFully(PacketReceiver.java:213)
at
org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver.doRead(PacketReceiver.java:134)
at
org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver.receiveNextPacket(PacketReceiver.java:109)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receivePacket(BlockReceiver.java:468)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receiveBlock(BlockReceiver.java:772)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:724)
at
org.apache.hadoop.hdfs.protocol.datatransfer.Receiver.opWriteBlock(Receiver.java:126)
at
org.apache.hadoop.hdfs.protocol.datatransfer.Receiver.processOp(Receiver.java:72)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:226)
at java.lang.Thread.run(Thread.java:745)
IO超时加大下面俩个参数即可
dfs.datanode.socket.write.timeout (加大)向datanode写入数据超时设置
dfs.client.socket-timeout (加大) dfsclients与集群间进行网络通信的超时设置
参考资料:http://www.sharpcloud.cn/thread-4927-1-1.html
加载中,请稍候......