set hive.vectorized.execution.enabled = true;
set hive.vectorized.execution.reduce.enabled = true;
set hive.cbo.enable=true;
set hive.compute.query.using.stats=true;
set hive.stats.fetch.column.stats=true;
set hive.stats.fetch.partition.stats=true;
set mapreduce.job.mappers=1000;
set mapreduce.job.reducers=1000;
SET hive.exec.parallel=true;
-----------------------------------------------------------
set hive.vectorized.groupby.maxentries=10240;
set hive.vectorized.groupby.flush.percent=0.1;
set hive.cbo.enable=true;
set hive.compute.query.using.stats=true;
set hive.stats.fetch.column.stats=true;
set hive.stats.fetch.partition.stats=true;
set hive.cli.print.header = true;
Working with Tez and getting issue like tez.TezProcessor|: java.lang.OutOfMemoryError: Java heap space
Then set the properties on query level so that container size increase for that particular query.
SET hive.tez.java.opts=-Xmx8192m -Xms4096m
SET hive.tez.container.size=4026
Go through link -
https://discuss.pivotal.io/hc/en-us/articles/226239948-Hive-query-with-TEZ-engine-failed-with-exception-java-lang-OutOfMemoryError-Java-heap-space
Hive query output generate empty files along with other files then use below properties.
set hive.merge.tezfiles=true;
set hive.merge.mapredfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.orcfile.stripe.level=true;
=================================================
Optimization for ORC files
set hive.vectorized.execution.enabled=true;
set hive.vectorized.execution.reduce.enabled = true;
set hive.vectorized.execution.reduce.groupby.enabled = true;
set hive.exec.parallel=true;
SET hive.tez.java.opts=-Xmx8192m -Xms4096m;
SET hive.tez.container.size=8192;
set hive.exec.parallel=true;
set hive.vectorized.execution.reduce.enabled = true;
set hive.cbo.enable=true;
set hive.compute.query.using.stats=true;
set hive.stats.fetch.column.stats=true;
set hive.stats.fetch.partition.stats=true;
set mapreduce.job.mappers=1000;
set mapreduce.job.reducers=1000;
SET hive.exec.parallel=true;
-----------------------------------------------------------
set hive.vectorized.groupby.maxentries=10240;
set hive.vectorized.groupby.flush.percent=0.1;
set hive.cbo.enable=true;
set hive.compute.query.using.stats=true;
set hive.stats.fetch.column.stats=true;
set hive.stats.fetch.partition.stats=true;
set hive.cli.print.header = true;
Working with Tez and getting issue like tez.TezProcessor|: java.lang.OutOfMemoryError: Java heap space
Then set the properties on query level so that container size increase for that particular query.
SET hive.tez.java.opts=-Xmx8192m -Xms4096m
SET hive.tez.container.size=4026
Go through link -
https://discuss.pivotal.io/hc/en-us/articles/226239948-Hive-query-with-TEZ-engine-failed-with-exception-java-lang-OutOfMemoryError-Java-heap-space
Hive query output generate empty files along with other files then use below properties.
set hive.merge.tezfiles=true;
set hive.merge.mapredfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.orcfile.stripe.level=true;
=================================================
Optimization for ORC files
set hive.vectorized.execution.enabled=true;
set hive.vectorized.execution.reduce.enabled = true;
set hive.vectorized.execution.reduce.groupby.enabled = true;
set hive.exec.parallel=true;
SET hive.tez.java.opts=-Xmx8192m -Xms4096m;
SET hive.tez.container.size=8192;
set hive.exec.parallel=true;
No comments:
Post a Comment