drop table kv_input; create table kv_input(k string, v string) row format delimited fields terminated by '\t' stored as textfile ; drop table kv_condensed; create table kv_condensed(k string, v string) stored as sequencefile ; load data local inpath '/Users/larry/kv_input.txt' overwrite into table kv_input; add file /Users/larry/identity.pl; add file /Users/larry/condense.pl; from ( from kv_input MAP k, v USING './identity.pl' as k, v cluster by k) map_output insert overwrite table kv_condensed reduce k, v using './condense.pl' ;