Apache Sqoop 是一种工具,用于在Apache Hadoop和结构化数据存储(如关系数据库)之间高效传输批量数据。

0x00 解压文件

1
2
3
tar -xf sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz
mv sqoop-1.4.7.bin__hadoop-2.6.0/ sqoop-1.4.7
ln -s /data/sqoop-1.4.7/ /data/sqoop

0x01 添加环境变量

1
2
3
4
5
vim ~/.bashrc
#export SQOOP_HOME=/data/sqoop/
#export PATH=${SQOOP_HOME}/bin:$PATH
source ~/.bashrc
sqoop version

0x02 修改sqoop-env.sh配置文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
cd conf/
mv sqoop-env-template.sh sqoop-env.sh
vim sqoop-env.sh

#Set path to where bin/hadoop is available
export HADOOP_COMMON_HOME=/data/hadoop/

#Set path to where hadoop-*-core.jar is available
export HADOOP_MAPRED_HOME=/data/hadoop/

#set the path to where bin/hbase is available
#export HBASE_HOME=

#Set the path to where bin/hive is available
export HIVE_HOME=/data/hive/

#Set the path for where zookeper config dir is
#export ZOOCFGDIR=

0x03 修改configure-sqoop文件

1
2
cd bin/
vim configure-sqoop

注释掉以下代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#if [ -z "${HBASE_HOME}" ]; then
# if [ -d "/usr/lib/hbase" ]; then
# HBASE_HOME=/usr/lib/hbase
# else
# HBASE_HOME=${SQOOP_HOME}/../hbase
# fi
#fi
#if [ -z "${HCAT_HOME}" ]; then
# if [ -d "/usr/lib/hive-hcatalog" ]; then
# HCAT_HOME=/usr/lib/hive-hcatalog
# elif [ -d "/usr/lib/hcatalog" ]; then
# HCAT_HOME=/usr/lib/hcatalog
# else
# HCAT_HOME=${SQOOP_HOME}/../hive-hcatalog
# if [ ! -d ${HCAT_HOME} ]; then
# HCAT_HOME=${SQOOP_HOME}/../hcatalog
# fi
# fi
#fi
#if [ -z "${ACCUMULO_HOME}" ]; then
# if [ -d "/usr/lib/accumulo" ]; then
# ACCUMULO_HOME=/usr/lib/accumulo
# else
# ACCUMULO_HOME=${SQOOP_HOME}/../accumulo
# fi
#fi
#if [ -z "${ZOOKEEPER_HOME}" ]; then
# if [ -d "/usr/lib/zookeeper" ]; then
# ZOOKEEPER_HOME=/usr/lib/zookeeper
# else
# ZOOKEEPER_HOME=${SQOOP_HOME}/../zookeeper
# fi
#fi

0x04 拷贝数据库连接jar包

1
cp /data/hive/lib/mysql-connector-java-5.1.44-bin.jar ./lib/

0x05 启动测试

1
2
3
4
5
6
7
8
9
10
11
sqoop list-databases --connect jdbc:mysql://localhost:3306/ --username root --password
sqoop list-tables --connect jdbc:mysql://localhost:3306/hive --username root --password

sqoop import \
--connect jdbc:mysql://mdw01:3306/hive \
--username hive --password mysql \
--table TBLS \
-m 1 \
--hive-import \
--create-hive-table \
--hive-table hive_tables

参考文献

数据仓库的初级手册