spark完整安装步骤
(2015-12-22 11:06:22)
标签:
sparkhadoop安装ubuntu |
机器环境:ubuntu14.04 32bit
安装版本:jdk1.7.0_79, hadoop2.6.2,scala2.9.2,
spark1.5.2
http://www.csdn.net/article/2015-04-24/2824552
安装结果:hadoop2.6.2因为是64bit本地库不兼容,每次会出warning,
重新编译源码不通过,各种报错,未能解决,但是貌似不影响spark使用,先不处理, 等高手解决~~
参考:http://blog.csdn.net/ggz631047367/article/details/42426391
主要步骤: jdk->Hadoop->scala->spark
#安装JDK
1. 安装包复制指定目录下
sudo mkdir /usr/lib/java
sudo tar zxvf /mnt/shared/jdk-7u79-linux-i586.gz -C
/usr/lib/java
2. 配置JDK环境
sudo gedit
/etc/profile
export JAVA_HOME=/usr/lib/java/jdk1.7.0_79
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
sudo update-alternatives --install /usr/bin/java java
/usr/lib/java/jdk1.7.0_79/bin/java 300
sudo update-alternatives --install /usr/bin/javac javac
/usr/lib/java/jdk1.7.0_79/bin/javac 300
3. 版本切换命令
sudo update-alternatives --config java
sudo update-alternatives --config javac
#安装Hadoop
1.创建hadoop用户组;
sudo addgroup hadoop
2.创建hadoop用户
sudo adduser –ingroup hadoop hadoop
3.给hadoop用户添加权限
sudo gedit /etc/sudoers
hadoop ALL=(ALL:ALL) ALL
4.复制Hadoop安装包到指定目录
sudo rm -rf /usr/local/hadoop
sudo cp hadoop-2.6.2.tar.gz /usr/local/
cd /usr/local
sudo tar -zxvf hadoop-2.6.2.tar.gz
sudo mv hadoop-2.6.2 hadoop
5.hadoop用户读取权限修改
su - hadoop
sudo chmod -R 775 /usr/local/hadoop
sudo chown -R hadoop:hadoop /usr/local/hadoop
6.配置hadoop
1)bashrc
sudo gedit ~/.bashrc
#HADOOP VARIABLES START
export JAVA_HOME=/usr/lib/java/jdk1.7.0_79
export HADOOP_INSTALL=/usr/local/hadoop
export PATH=$PATH:$HADOOP_INSTALL/bin
export PATH=$PATH:$JAVA_HOME/bin
export PATH=$PATH:$HADOOP_INSTALL/sbin
export HADOOP_MAPRED_HOME=$HADOOP_INSTALL
export HADOOP_COMMON_HOME=$HADOOP_INSTALL
export HADOOP_HDFS_HOME=$HADOOP_INSTALL
export YARN_HOME=$HADOOP_INSTALL
export
HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_INSTALL/lib/native
export
HADOOP_OPTS="-Djava.library.path=$HADOOP_INSTALL/lib"
#HADOOP VARIABLES END
source ~/.bashrc
2) hadoop-env
sudo gedit /usr/local/hadoop/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/lib/java/jdk1.7.0_79
3)profile
sudo gedit /etc/profile
export HADOOP_HOME=/usr/local/hadoop
export YARN_HOME=/usr/local/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
export
HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export
HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
7. 测试Hadoop(报错,未找到命令,没有解决)
cd /usr/local/hadoop
mkdir input
cp README.txt input
bin/hadoop jar
share/hadoop/mapreduce/sources/hadoop-mapreduce-examples-2.6.2-sources.jar
pi 100 1000000
org.apache.hadoop.examples.WordCount input output
8. Hadoop伪分布式配置
sudo gedit /usr/local/hadoop/etc/hadoop/core-site.xml
[html] view plaincopy
-
<</span>configuration>
-
<</span>property> -
<</span>name>hadoop.tmp.dir</</span>name> -
<</span>value>/usr/local/hadoop/tmp</</span>value> -
<</span>description>Abase for </</span>description>other temporary directories. -
</</span>property> -
<</span>property> -
<</span>name>fs.defaultFS</</span>name> -
<</span>value>hdfs://localhost:9000</</span>value> -
</</span>property> -
</</span>configuration>
[html] view plaincopy
-
<</span>property>
-
<</span>name>yarn.resourcemanager.hostname</</span>name> -
<</span>value>master</</span>value> -
</</span>property>
-
<</span>property>
-
<</span>description>The address </</span>description>of the applications manager interface in the RM. -
<</span>name>yarn.resourcemanager.address</</span>name> -
<</span>value>${yarn.resourcemanager.hostname}:8032</</span>value> -
</</span>property>
-
<</span>property>
-
<</span>description>The address </</span>description>of the scheduler interface. -
<</span>name>yarn.resourcemanager.scheduler.address</</span>name> -
<</span>value>${yarn.resourcemanager.hostname}:8030</</span>value> -
</</span>property> -
<</span>property>
-
<</span>description>The http </</span>description>address of the RM web application. -
<</span>name>yarn.resourcemanager.webapp.address</</span>name> -
<</span>value>${yarn.resourcemanager.hostname}:8088</</span>value> -
</</span>property>
-
<</span>property>
-
<</span>description>The https </</span>description>adddress of the RM web application. -
<</span>name>yarn.resourcemanager.webapp.https.address</</span>name> -
<</span>value>${yarn.resourcemanager.hostname}:8090</</span>value> -
</</span>property>
-
<</span>property>
-
<</span>name>yarn.resourcemanager.resource-tracker.address</</span>name> -
<</span>value>${yarn.resourcemanager.hostname}:8031</</span>value> -
</</span>property>
-
<</span>property>
-
<</span>description>The address </</span>description>of the RM admin interface. -
<</span>name>yarn.resourcemanager.admin.address</</span>name> -
<</span>value>${yarn.resourcemanager.hostname}:8033</</span>value> -
</</span>property>
-
<</span>property>
-
<</span>name>yarn.nodemanager.aux-services</</span>name> -
<</span>value>mapreduce_shuffle</</span>value> -
</</span>property>
//伪分布式不用配
[html] view plaincopy
-
<</span>property>
-
<</span>name>mapreduce.framework.name</</span>name> -
<</span>value>yarn</</span>value> -
</</span>property>
-
<</span>property>
-
<</span>name>mapreduce.jobhistory.address</</span>name> -
<</span>value>master:10020</</span>value> -
<</span>description>MapReduce JobHistory </</span>description>Server IPC host:port -
</</span>property>
-
<</span>property>
-
<</span>name>mapreduce.jobhistory.webapp.address</</span>name> -
<</span>value>master:19888</</span>value> -
<</span>description>MapReduce JobHistory </</span>description>Server Web UI host:port -
</</span>property>
[html] view plaincopy
-
<</span>configuration>
-
<</span>property>
-
<</span>name>dfs.replication</</span>name> -
<</span>value>1</</span>value> -
</</span>property> -
<</span>property> -
<</span>name>dfs.namenode.name.dir</</span>name> -
<</span>value>file:/usr/local/hadoop/dfs/name</</span>value> -
</</span>property> -
<</span>property> -
<</span>name>dfs.datanode.data.dir</</span>name> -
<</span>value>file:/usr/local/hadoop/dfs/data</</span>value> -
</</span>property> -
<</span>property> //这个属性节点是为了防止后面eclopse存在拒绝读写设置的 -
<</span>name>dfs.permissions</</span>name> -
<</span>value>false</</span>value> -
</</span>property> -
</</span>configuration>
9.配置master,slaves
sudo gedit /usr/local/hadoop/etc/hadoop/masters
localhost
sudo gedit /usr/local/hadoop/etc/hadoop/slaves
localhost
10.创建所需的临时目录
cd /usr/local/hadoop
mkdir tmp dfs dfs/name dfs/data
11.始化文件系统HDFS
bin/hdfs namenode -format
12.启动
sbin/start-dfs.sh
sbin/start-yarn.sh
13.开启Jobhistory
sbin/mr-jobhistory-daemon.sh start
historyserver
jps 查看进程
#安装Scala
sudo apt-get install scala
scala -version
#安装spark
1.复制spark到安装目录
sudo cp spark-1.5.2-bin-hadoop2.6.tgz /usr/local
sudo tar -xvzf spark-1.5.2-bin-hadoop2.6.tgz
sudo mv spark-1.5.2-bin-hadoop2.6
/usr/local/spark-hadoop
2.配置spark
sudo gedit /etc/profile
export SPARK_HOME=/usr/local/spark-hadoop
export PATH=$SPARK_HOME/bin:$PATH
source /etc/profile
cd /usr/local/spark-hadoop/conf
sudo cp spark-env.sh.template spark-env.sh
sudo gedit spark-env.sh
export JAVA_HOME=/usr/lib/java/jdk1.7.0_79
export SPARK_MASTER_IP=localhost
export SPARK_WORKER_MEMORY=1000m
3.修改用户权限
sudo chmod -R 775 /usr/local/spark-hadoop
sudo chown -R hadoop:hadoop /usr/local/spark-hadoop
4.启动spark
cd /usr/local/spark-hadoop
sbin/start-all.sh
5.测试spark
cd /usr/local/spark-hadoop
bin/run-example SparkPi
前一篇:R语言学习-文本分析-分类建模
后一篇:Spark-Python基础操作

加载中…