sudo apt-get update
sudo apt-get install default-jre
curl --create-dirs -o ~/.embulk/bin/embulk -L "https://dl.embulk.org/embulk-latest.jar"
chmod +x ~/.embulk/bin/embulk
echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
source ~/.bashrc
embulk gem install [plugin name]
Plugin은 여기를 클릭하시면 리스트를 볼 수 있습니다! input으로 mysql, output으로 bigquery를 사용하기 위해 플러그인을 설치해보겠습니다
embulk gem install embulk-input-mysql
embulk gem install embulk-output-bigquery
embulk gem list로 설치된 플러그인을 확인할 수 있습니다
embulk example
- 예제 데이터를 생성
embulk guess embulk-example/seed.yml
- seed.yml파일을 기반으로 설정을 추론합니다
embulk guess embulk-example/seed.yml -o config.yml
- seed.yml파일을 기반으로 설정파일인 config.yml을 생성합니다
embulk preview config.yml
- config.yml을 읽어 데이터를 파싱이 진행되는지 테스트합니다
embulk run config.yml
- config.yml을 읽어 실행합니다
in:
type: mysql
host: localhost
port: 3306
user: root
password: root
database: database_name
table: table_name
select: "col1, col2, col3"
where: "col4 != 'a'"
order_by: "col1 DESC"
out:
type: bigquery
mode: replace
auth_method: json_key
json_keyfile: /path/to/json_keyfile.json
project: my-project
dataset: reservation
table: reservation
gcs_bucket: seongyun
auto_create_gcs_bucket: true
auto_create_table: true
auto_create_dataset: true
embulk run config.yml
in:
type: mysql
host: localhost
user: root
password: root
database: database_name
query: |
SELECT t1.id, t1.name, t2.id AS t2_id, t2.name AS t2_name
FROM table1 AS t1
LEFT JOIN table2 AS t2
ON t1.id = t2.t1_id
in:
type: mysql
host: localhost
user: root
password: root
database: database_name
table: table_name
select: "col1, col2, col3"
where: "col4 != 'a'"
column_options:
col1: {type: long}
col3: {type: string, timestamp_format: "%Y/%m/%d", timezone: "+0900"}
after_select: "update table_name set col5 = '1' where col4 != 'a'"
exec:
max_threads: 16
min_output_tasks: 4