Sunday, June 29, 2014

Impala SQL samples

1. Create external table on hdfs
CREATE EXTERNAL TABLE tab1
(
   id INT,
   col_1 BOOLEAN,
   col_2 DOUBLE,
   col_3 TIMESTAMP
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/user/cloudera/sample_data/tab1';
2. Create partition table in Central Impala data directory(hive.metastore.warehouse.dir).
use external_partitions;
create table logs (field1 string, field2 string, field3 string)
  partitioned by (year string, month string , day string, host string)
  row format delimited fields terminated by ',';

insert into logs partition (year="2013", month="07", day="28", host="host1") 
values ("foo","foo","foo");
Data directory sample:
/user/hive/warehouse/external_partitions.db/logs/year=2013/month=07/day=28/host=host1/db4dbc69990cac44-4b80d4c7ec455cae_391531365_data.0
3.  Create external partition table whose data exist in specific directory.
create external table logs (field1 string, field2 string, field3 string)
  partitioned by (year string, month string, day string, host string)
  row format delimited fields terminated by ','
  location '/user/impala/data/logs';

alter table logs add partition (year="2013",month="07",day="28",host="host1");

refresh logs;
4. Refresh Metadata
  • After create, drop, or alter a table or other kind of object through Hive:
INVALIDATE METADATA [TABLE_NAME];
  • After load, insert, or change data in an existing table through Hive (or even through manual HDFS operations such as the hdfs command):
REFRESH [TABLE_NAME];
5. Hash Join VS Cross Join(Cartesian product)
  • Hash Join
explain select concat(heroes.name,' vs. ',villains.name) as battle
 from heroes join villains
 where heroes.era = villains.era and heroes.planet = villains.planet;

+------------------------------------------------------------------------------------+
| Explain String                                                                     |
+------------------------------------------------------------------------------------+
| Estimated Per-Host Requirements: Memory=2.03GB VCores=2                            |
| WARNING: The following tables are missing relevant table and/or column statistics. |
| external_partitions.heroes, external_partitions.villains                           |
|                                                                                    |
| 04:EXCHANGE [PARTITION=UNPARTITIONED]                                              |
| |                                                                                  |
| 02:HASH JOIN [INNER JOIN, BROADCAST]                                               |
| |  hash predicates: heroes.era = villains.era, heroes.planet = villains.planet     |
| |                                                                                  |
| |--03:EXCHANGE [BROADCAST]                                                         |
| |  |                                                                               |
| |  01:SCAN HDFS [external_partitions.villains]                                     |
| |     partitions=1/1 size=84B                                                      |
| |                                                                                  |
| 00:SCAN HDFS [external_partitions.heroes]                                          |
|    partitions=1/1 size=75B                                                         |
+------------------------------------------------------------------------------------+
  • Cross Join(Cartesian product)
explain select concat(heroes.name,' vs. ',villains.name) as battle 
from heroes cross join villains;

+------------------------------------------------------------------------------------+
| Explain String                                                                     |
+------------------------------------------------------------------------------------+
| Estimated Per-Host Requirements: Memory=2.06GB VCores=2                            |
| WARNING: The following tables are missing relevant table and/or column statistics. |
| external_partitions.heroes, external_partitions.villains                           |
|                                                                                    |
| 04:EXCHANGE [PARTITION=UNPARTITIONED]                                              |
| |                                                                                  |
| 02:CROSS JOIN [BROADCAST]                                                          |
| |                                                                                  |
| |--03:EXCHANGE [BROADCAST]                                                         |
| |  |                                                                               |
| |  01:SCAN HDFS [external_partitions.villains]                                     |
| |     partitions=1/1 size=84B                                                      |
| |                                                                                  |
| 00:SCAN HDFS [external_partitions.heroes]                                          |
|    partitions=1/1 size=75B                                                         |
+------------------------------------------------------------------------------------+
6. Analyze Table
COMPUTE STATS [TABLE_NAME];
7. Set request pool in impala-shell
SET REQUEST_POOL=my_pool;
8. Explain level
set EXPLAIN_LEVEL=3;
9. Plan Profile
profile;
==

No comments:

Post a Comment

Popular Posts