hive
show databases;
create database db;
create database if not exists db1;
TO ADD COMMENT:
create database if not exists db3 comment " this is sample database";
show databases;
show databases like 'd*';
describe database db1;
drop database db1;
CREATE TABLE table-name -- if table already exists it will fail or use IF NOT
EXISTS
column names
COMMENT table-comment
ROW FORMAT
DELIMITED --how columns are seperated that can be comma, space or any symbol
FIELDS TERMINATED BY --any symbol, default is '\001'
LOCATION
STORED AS -- what type of file in which data stored i.e. text,binary file etc
(optional)
------------TABLE CREATION---------------------
create table customers(id INT, fname STRING, lname STRING, city STRING)
> ROW FORMAT DELIMITED
> FIELDS TERMINATED BY '|'
> STORED AS TEXTFILE;
*********insert data*********
LOAD DATA LOCAL INPATH'C:\Users\Dell\Desktop\customers.txt' into table customers;
select * from customers;
drop table customers;
-----------------DDL COMMANDS--------------------
******CREATE******
create table employee(id int);
load data local inpath 'C:\Users\Dell\Desktop\emp.txt' into table employee;
select * from employee;
*****ALTER****
alter table employee rename to emp; -- to rename table
alter table emp add columns( name string); --- to change schema
***** DESCRIBE*******
describe emp;
****SHOW****
show tables; ---show tables in db
********TRUNCATE*******
truncate table emp; --- deletes the data from table
select * from emp;
*******DROP******
drop table emp; --- deletes the schema as well
------------------Hive DML Commands -----------------------------
*********SELECT***********
select * from emp;
********* WHERE *************
select fname from customers where id=1;
********* GROUP BY *************
******** ORDER BY *************
********** LOAD **************
LOAD DATA LOCAL INPATH'C:\Users\Dell\Desktop\customers.txt' into table customers;
--------------------- COMPLEX TYPE IN HIVE----------------
************ARRAY************
create table temperature(sno int,place string,mytemp array<double>)
> row format delimited
> fields terminated by '\t'
> collection items terminated by ',';
load data local inpath C:\Users\Dell\Desktop\LPU 23241\INT 312\PPTS\hive\array.txt
into table temperature;
select * from table;
select place,mytemp[0] from temperature;
select place,mytemp[3] from temperature;
***************MAP*********************
create table tab(city string, gender string, collection map<int,int>)
> row format delimited
> fields terminated by '\t'
> collection items terminated by ','
> map keys terminated by ':';
load data local inpath C:\Users\Dell\Desktop\LPU 23241\INT 312\PPTS\hive\mapset.txt
into table tab;
describe tab;
select * from tab;
select collection[2022] from map where city='xy';
select collection[2022] from map where city='xy' and gender='male;
*************************STRUCT********************
create table struct(name string, city string, info struct<place:string,temp:float>)
> row format delimited
> fields terminated by '\t'
> collection items terminated by ',';
load data local inpath C:\Users\Dell\Desktop\LPU 23241\INT 312\PPTS\hive\struct.txt
into table struct;
select * from struct;
****elements of struct will be accessed using dot notation****
select info.temp from struct;
select info.place from struct;
----------------------------------OPERATORS-----------------------------
***********ARITHMATIC OPERATORS************
Operators Description
A + B This is used to add A and B.
A - B This is used to subtract B from A.
A * B This is used to multiply A and B.
A / B This is used to divide A and B and returns the quotient of the operands.
A % B This returns the remainder of A / B.
A | B This is used to determine the bitwise OR of A and B.
A & B This is used to determine the bitwise AND of A and B.
A ^ B This is used to determine the bitwise XOR of A and B.
~A This is used to determine the bitwise NOT of A.
select cgpa+1.0 from students;
******************Relational Operators******************
Operator Description
A=B It returns true if A equals B, otherwise false.
A <> B, A !=B It returns null if A or B is null; true if A is not equal to B,
otherwise false.
A<B It returns null if A or B is null; true if A is less than B, otherwise false.
A>B It returns null if A or B is null; true if A is greater than B, otherwise
false.
A<=B It returns null if A or B is null; true if A is less than or equal to B,
otherwise false.
A>=B It returns null if A or B is null; true if A is greater than or equal to B,
otherwise false.
A IS NULL It returns true if A evaluates to null, otherwise false.
A IS NOT NULL It returns false if A evaluates to null, otherwise true.
select regno from students where cgpa>5.0;
----------------------------FUNCTIONS----------------------
Return Type Operator Description
BIGINT count(*) It returns the count of the number of rows present in the
file.
DOUBLE sum(col) It returns the sum of values.
DOUBLE sum(DISTINCT col) It returns the sum of distinct values.
DOUBLE avg(col) It returns the average of values.
DOUBLE avg(DISTINCT col) It returns the average of distinct values.
DOUBLE min(col) It compares the values and returns the minimum one form it.
DOUBLE max(col) It compares the values and returns the maximum one form it.
select count(*) from students;
select max(cgpa) from students;
Return Type Operator Description
INT length(str) It returns the length of the string.
STRING reverse(str) It returns the string in reverse order.
STRING concat(str1, str2, ...) It returns the concatenation of two or more
strings.
STRING substr(str, start_index) It returns the substring from the string
based on the provided starting index.
STRING substr(str, int start, int length) It returns the substring from the
string based on the provided starting index and length.
STRING upper(str) It returns the string in uppercase.
STRING lower(str) It returns the string in lowercase.
STRING trim(str) It returns the string by removing whitespaces from both the
ends.
STRING ltrim(str) It returns the string by removing whitespaces from left-
hand side.
TRING rtrim(str) It returns the string by removing whitespaces from right-hand
side.
select regno,upper(fname) from students;
--------------------------GROUP BY---------------
select section, avg(cgpa) from student group by section;
---------------------- Having--------
select section,avg(cgpa) from student group by section having cgpa>5.00;
------------------ORDER By---------------
select regno,name,cgpa from student order by cgpa;
select regno,name,cgpa from student order by cgpa desc;
----------------SORT BY-------------------
select * from student sort by cgpa desc;
----------------------------------------HIVE
VIEWS------------------------------------------------
To simplify the complexities of a larger table into more flat structure ( Work with
subset of data instead of entire data)
Similar to tables, just like sql views.
Provides extra security as we can hide unnecessary details or column with the help
of view.
create table vexample(id int,name string, sal int, dept string)
row format delimited
fields terminated by '\t';
load data local inpath 'C:\Users\Dell\Desktop\LPU 23241\INT 312\PPTS\hive\view.txt'
into table vexample;
describe vexample;
select * from vexample;
CREATE A VIEW OF THIS TABLE BY HIDING ONE COLUMN SALARY
create view viewone as select id,name,dept from vexample;
select * from viewone;
drop view viewone;
-----------------------------------------HIVE
PARTITIONING---------------------------------------
divide the data in such a way that relative data can be stored together. For
example divide a particular data country wise or state wise out of entire table.
Lets create a table containing states cities and pin code. With partitioning we
will divide this table based on different states.
create table states(state string,city string,pin bigint)
row format delimited
fields terminated by ',';
load data local inpath 'C:\Users\Dell\Desktop\LPU 23241\INT 312\PPTS\hive\
partitioning.txt' into table states;
describe states;
select * from states;
Now states table is my source table for partitioning
create table part(city string,pin bigint)
partitioned by(state string);
set hive.exec.dynamic.partition.mode=nonstrict; -property we need
to define for partitioning enablinig
insert overwrite table part partition(state) -destination i.e.
where i will overwrite the data
> select city,pin,state from states;
now in user/hive/warehouse these different partitions will be stored as per the
different states.
-----------------------------------------HIVE
BUCKETING----------------------------------------------------
Similar to partitioning in hive but with an added functionalities that divide
large dataset into more manageable parts called as buckets.
Based on hashing technique i.e. the way to organize the data
create table buctab(id int,name string,salary float)
row format delimited
fields terminated by ',';
load data local inpath 'C:\Users\Dell\Desktop\LPU 23241\INT 312\PPTS\hive\
bucket.txt' into table buctab;
select * from buctab;
set hive.enforce.bucketing=true; --enable bucketing
create table buc1(id int,name string,sal float) --create buckets
>clustered by(id) into 3 buckets
>row format delimited
>fields terminated by ',';
insert overwrite table buc1 select * from buctab; insert data into
buckets from source table
check the buckets in the user/hive/warehouse/buctab_bucket