Friday, October 15, 2004

"Pivot" Queries

One of the frequently asked questions on SQL forums is how to present data “horizontally” rather than “vertically”, known as a “pivot query”. For example, instead of this:

DEPTNO      JOB         HEADCOUNT
----------  ---------  ----------
10          CLERK               1
10          MANAGER             1
20          ANALYST             2
20          CLERK               2
20          MANAGER             1
30          CLERK               1
30          MANAGER             1
30          SALESMAN            4

… people would like to know how to produce this:

DEPTNO         ANALYST       CLERK     MANAGER    SALESMAN
----------  ----------  ----------  ----------  ----------
10                               1           1
20                   2           2           1
30                               1           1           4


In Oracle, the normal way is to write a query using DECODE like this:

select deptno
, count(DECODE(job,'ANALYST', 1)) as "ANALYST"
, count(DECODE(job,'CLERK', 1)) as "CLERK"
, count(DECODE(job,'MANAGER', 1)) as "MANAGER"
, count(DECODE(job,'SALESMAN', 1)) as "SALESMAN"
from emp
group by deptno
order by deptno;

The DECODE ensures that the jobs go in the right columns.
Now this type of query is quite easy to write (when you know how), but is also quite laborious; also, if the list of jobs changes then the query needs to be changed too.
To make producing such queries a “piece of cake” I have built a package called pivot that can be used to generate the SQL like this:
begin
pivot.print_pivot_query
( 'deptno'
, 'job'
, 'emp'
, '1'
, agg_types => 'count'
);
end;
/
select deptno
, count(DECODE(job,'ANALYST', 1)) as "ANALYST"
, count(DECODE(job,'CLERK', 1)) as "CLERK"
, count(DECODE(job,'MANAGER', 1)) as "MANAGER"
, count(DECODE(job,'SALESMAN', 1)) as "SALESMAN"
from emp
group by deptno
order by deptno
PL/SQL procedure successfully completed.


The package code follows. Note that it requires the “parse” package that I posted earlier.

CREATE OR REPLACE PACKAGE pivot IS
  TYPE ref_cursor IS REF CURSOR;
  PROCEDURE print_pivot_query
  ( group_cols   IN VARCHAR2              -- Comma-separated list of column(s), including table alias if applicable,
                                          -- to be used to group the records
  , pivot_col    IN VARCHAR2              -- The name of the column to be pivoted, including table alias if applicable
  , tables       IN VARCHAR2              -- Comma-separated list of table(s), with table alias if applicable
  , value_cols   IN VARCHAR2              -- Comma-separated list of column(s), including table alias if applicable
                                          -- for which aggregates are to be shown at each intersection
  , pivot_values IN VARCHAR2 DEFAULT NULL -- Comma-separated list of values of pivot column to be used;
                                          -- if omitted, all values are used (determined dynamically)
  , agg_types    IN VARCHAR2 DEFAULT NULL -- Comma-separated list of aggregate types, corresponding to value_cols;
                                          -- if omitted, SUM is the default
  , where_clause IN VARCHAR2 DEFAULT NULL -- where clause of query
  , order_by     IN VARCHAR2 DEFAULT NULL -- order by clause; if omitted, output is ordered by group_cols
  );
  FUNCTION pivot_query
  ( group_cols IN VARCHAR2
  , pivot_col IN VARCHAR2
  , tables IN VARCHAR2
  , value_cols IN VARCHAR2
  , pivot_values IN VARCHAR2 DEFAULT NULL
  , agg_types IN VARCHAR2 DEFAULT NULL
  , where_clause IN VARCHAR2 DEFAULT NULL
  , order_by IN VARCHAR2 DEFAULT NULL
  ) RETURN VARCHAR2;
  FUNCTION pivot_cursor
  ( group_cols IN VARCHAR2
  , pivot_col IN VARCHAR2
  , tables IN VARCHAR2
  , value_cols IN VARCHAR2
  , pivot_values IN VARCHAR2 DEFAULT NULL
  , agg_types IN VARCHAR2 DEFAULT NULL
  , where_clause IN VARCHAR2 DEFAULT NULL
  , order_by IN VARCHAR2 DEFAULT NULL
  ) RETURN ref_cursor;
END;
/
CREATE OR REPLACE PACKAGE BODY pivot IS
  g_mode varchar2(10);
  g_sql varchar2(32767);
  PROCEDURE pr
  ( p_text in varchar2
  )
  IS
    v_text VARCHAR2(32767) := p_text;
  BEGIN
    if g_mode = 'PRINT' then
      WHILE LENGTH(v_text) > 255 LOOP
        DBMS_OUTPUT.PUT_LINE( SUBSTR(v_text,1,255) );
        v_text := SUBSTR(v_text,256);
      END LOOP;
      DBMS_OUTPUT.PUT_LINE( v_text );
    else
      g_sql := g_sql || ' ' || p_text;
    end if;
  END pr;
  /*
  || Generates the SQL statement for a pivot query based on parameters
  || Example:
  || create_pivot_query
  || ( group_cols => 'd.dname'
  || , pivot_col => 'e.job'
  || , tables => 'emp e, dept d'
  || , value_cols => 'e.sal,e.age'
  || , agg_types => 'min,max'
  || , where_clause => 'e.deptno = d.deptno'
  || );
  || Generates a query like:
  || select d.dname
  || , min(DECODE(e.job,'ANALYST', e.sal, 0 )) as "min_ANALYST_esal"
  || , max(DECODE(e.job,'ANALYST', e.age, 0 )) as "max_ANALYST_eage"
  || , min(DECODE(e.job,'CLERK', e.sal, 0 )) as "min_CLERK_esal"
  || , max(DECODE(e.job,'CLERK', e.age, 0 )) as "max_CLERK_eage"
  || , min(DECODE(e.job,'MANAGER', e.sal, 0 )) as "min_MANAGER_esal"
  || , max(DECODE(e.job,'MANAGER', e.age, 0 )) as "max_MANAGER_eage"
  || , min(DECODE(e.job,'PRESIDENT', e.sal, 0 )) as "min_PRESIDENT_esal"
  || , max(DECODE(e.job,'PRESIDENT', e.age, 0 )) as "max_PRESIDENT_eage"
  || , min(DECODE(e.job,'SALESMAN', e.sal, 0 )) as "min_SALESMAN_esal"
  || , max(DECODE(e.job,'SALESMAN', e.age, 0 )) as "max_SALESMAN_eage"
  || from emp e, dept d
  || where e.deptno = d.deptno
  || group by d.dname
  || order by d.dname
  ||
  || i.e. the parameters are used like this:
  || select
  || , (DECODE(,, , 0 ))
  || from
  || where
  || group by
  || order by
  ||
  */
  PROCEDURE define_pivot_query
  ( group_cols IN VARCHAR2
  , pivot_col IN VARCHAR2
  , tables IN VARCHAR2
  , value_cols IN VARCHAR2
  , pivot_values IN VARCHAR2 DEFAULT NULL
  , agg_types IN VARCHAR2 DEFAULT NULL
  , where_clause IN VARCHAR2 DEFAULT NULL
  , order_by IN VARCHAR2 DEFAULT NULL
  )
  IS
    type ref_cursor is ref cursor;
    rc ref_cursor;
    pv_tab parse.varchar2_table;
    val_tab parse.varchar2_table;
    agg_tab parse.varchar2_table;
    num_pvs integer := 0;
    num_vals integer := 0;
    num_aggs integer := 0;
    alias varchar2(100);
  BEGIN
    g_sql := NULL;
    -- Determine pivot values: use list if given, otherwise construct query to find them
    if pivot_values is not null then
      parse.delimstring_to_table( pivot_values, pv_tab, num_pvs );
    else
      open rc for 'select distinct ' || pivot_col || ' from ' || tables || ' where ' || nvl(where_clause,'1=1');
      loop
        num_pvs := num_pvs+1;
        fetch rc into pv_tab(num_pvs);
        exit when rc%notfound;
      end loop;
      close rc;
      num_pvs := num_pvs-1;
    end if;
    parse.delimstring_to_table( value_cols, val_tab, num_vals );
    -- Determine aggregate functions (default is SUM)
    if agg_types is not null then
      parse.delimstring_to_table( agg_types, agg_tab, num_aggs );
    end if;
    if num_aggs <> num_vals then
      for i in num_aggs+1..num_vals loop
        agg_tab(i) := 'sum';
      end loop;
    end if;
    pr('select '||group_cols);
    for pv in 1..num_pvs loop
      pv_tab(pv) := trim(pv_tab(pv));
      for val in 1..num_vals loop
        val_tab(val) := trim(val_tab(val));
        if num_vals = 1 then
          alias := substr(pv_tab(pv),1,30);
        else
          alias := substr(agg_tab(val) || '_' || TRANSLATE(pv_tab(pv),'x. -()<>','x') || '_' || TRANSLATE(val_tab(val),'x. -()<>','x'),1,30);
        end if;
        pr( ', '||agg_tab(val)||'(DECODE(' || pivot_col || ',''' || pv_tab(pv) || ''', ' || val_tab(val) || '))'
            || ' as "' || alias || '"' );
      end loop;
    end loop;
    pr('from ' || tables );
    if where_clause is not null then
      pr('where ' || where_clause);
    end if;
    pr('group by ' || group_cols);
    pr('order by ' || NVL(order_by,group_cols));
  END define_pivot_query;
  PROCEDURE print_pivot_query
  ( group_cols IN VARCHAR2
  , pivot_col IN VARCHAR2
  , tables IN VARCHAR2
  , value_cols IN VARCHAR2
  , pivot_values IN VARCHAR2 DEFAULT NULL
  , agg_types IN VARCHAR2 DEFAULT NULL
  , where_clause IN VARCHAR2 DEFAULT NULL
  , order_by IN VARCHAR2 DEFAULT NULL
  )
  IS
  BEGIN
    g_mode := 'PRINT';
    define_pivot_query
    ( group_cols
    , pivot_col
    , tables
    , value_cols
    , pivot_values
    , agg_types
    , where_clause
    , order_by
    );
  END;
  FUNCTION pivot_query
  ( group_cols IN VARCHAR2
  , pivot_col IN VARCHAR2
  , tables IN VARCHAR2
  , value_cols IN VARCHAR2
  , pivot_values IN VARCHAR2 DEFAULT NULL
  , agg_types IN VARCHAR2 DEFAULT NULL
  , where_clause IN VARCHAR2 DEFAULT NULL
  , order_by IN VARCHAR2 DEFAULT NULL
  ) RETURN VARCHAR2
  IS
  BEGIN
    g_mode := 'TEXT';
    define_pivot_query
    ( group_cols
    , pivot_col
    , tables
    , value_cols
    , pivot_values
    , agg_types
    , where_clause
    , order_by
    );
    RETURN g_sql;
  END;
  FUNCTION pivot_cursor
  ( group_cols IN VARCHAR2
  , pivot_col IN VARCHAR2
  , tables IN VARCHAR2
  , value_cols IN VARCHAR2
  , pivot_values IN VARCHAR2 DEFAULT NULL
  , agg_types IN VARCHAR2 DEFAULT NULL
  , where_clause IN VARCHAR2 DEFAULT NULL
  , order_by IN VARCHAR2 DEFAULT NULL
  ) RETURN ref_cursor
  IS
    rc ref_cursor;
  BEGIN
    g_mode := 'CURSOR';
    define_pivot_query
    ( group_cols
    , pivot_col
    , tables
    , value_cols
    , pivot_values
    , agg_types
    , where_clause
    , order_by
    );
    OPEN rc FOR g_sql;
    RETURN rc;
  END;
END;

/

18 comments:

Anonymous said...

This is fantastic .. keep the creative juices going..

Anonymous said...

Brilliant !!!. The best I have seen. The packages are very powerful, easy to use. Should be part of Oracle standard.

Ed said...

This is by far the best pivot function I have found. Just 1 question though, can you maybe show an example of how one would call this/select the output? Not used to ORACLE :\

Ed said...

Never mind figured it out, SQLDeveloper was confusing me ;)

Was passing named parameters.

select PIVOT.PIVOT_QUERY(
'DIM_GROUPING_FIELD',
'FR_MONTH',
'VW_TEST_PIVOT',
'MEASURE',
NULL,
NULL,
NULL,
NULL
) as resQuery from dual;

Unknown said...

Can you show an example of how the pivot functions can be called and return a recordset from a SELECT statement?

For example (something like this):
select * from (select PIVOT.PIVOT_query('deptno', 'job', 'emp', '1', NULL, 'count') from dual) a

Tony Andrews said...

Mitchell,

I'm not sure that that's feasible, since the return type of the function would have to change according to the number of columns returned.

Unknown said...

Hi Tony

I didn`t find an example of pivot_cursor.

I'd like to execute :

pivot.pivot_cursor
( 'x.ID_CENTRO, v.DESC_CENTRO, x.PERIODO, V.TOTAL '
, 'ID_PERFIL'
, 'v_tot_tx_user_perf_mens_ofic x, V_TOTAL_TX_MENS_OFICINA V'
, '1'
, agg_types => 'count'
, where_clause => 'x.id_centro = V.ID_CENTRO and x.periodo = v.periodo')

with a pivot_cursor. Can you help me to do the procedure to execute this script?

Thank you in advance

Alberto Rico
albrico@yahoo.com

Tony Andrews said...

ARA,

Something like this:

declare
l_cur sys_refcursor;
begin
l_cur := pivot.pivot_cursor
( 'x.ID_CENTRO, v.DESC_CENTRO, x.PERIODO, V.TOTAL '
, 'ID_PERFIL'
, 'v_tot_tx_user_perf_mens_ofic x, V_TOTAL_TX_MENS_OFICINA V'
, '1'
, agg_types => 'count'
, where_clause => 'x.id_centro = V.ID_CENTRO and x.periodo = v.periodo');

loop
fetch_l_cur into [variables];
exit when l_cur%notfound;
-- do something with this row
end loop;
close l_cur;
end;

Unknown said...

Hi Tony

I'd like to know if I can put an alias to a pivot column name. Is it possible?. For example change min_ANALYST_esal with ANALYST_salary or ANALYST_minimal_SALARY

Thank you again

Alberto Rico

Tony Andrews said...

@ARA, you would need to modify the package if you want it to generate different aliases. But are the precise alias names really so important?

BrianSherwood said...

Hello Tony.
Brilliant piece of code.
I'm having some difficulty using this for my purpose. I have two tables with the following data I'm trying to report on:
Tables
Equip e
Details d

d.Site e.Equip d.Material d.Value
======== ======= ========= =======
Plant 1 E-1 CO2 74000
Plant 1 E-1 CH4 133
Plant 1 E-1 N2O 12
Plant 1 E-2 CO2 140000
Plant 1 E-2 CH4 13
Plant 1 E-2 N2O 1.2
Plant 1 E-3 CO2 84000
Plant 1 E-3 CH4 143
Plant 1 E-3 N2O 22
etc.

The table I need to create is:
Site Equip CO2 CH4 N2O
-------- -------- ------ ------ -----
Plant 1 E-1 74000 133 13
Plant 1 E-2 140000 13 1.2
Plant 1 E-3 84000 143 22

My call:
SELECT pivot.print_pivot_query(
'e.Equip_Name, e.Equip_sub_category',
'd.Calc_Material',
'wh_equip e, wh_emis_detail d',
'd.segment_mass_kg/1000',
'sum',
'e.site_name=''Chaparral Plant'' AND d.Site_Name=e.Site_Name AND d.Scenario=''GHG''',
'e.EQUIP_SUB_CATEGORY, e.EQUIP_NAME')


Error I'm getting:
ORA-00923: FROM keyword not found where expected

Can you set me straight?




Error I'm getting:

Tony Andrews said...

Brian,

There were a few mistakes in yours:
1) The function version is called pivot_query not print_pivot_query
2) You missed the FROM clause of your select statement
3) You omitted the pivot_values parameter (and didn't use named notation to compensate)

Try either:

SELECT pivot.pivot_query(
'e.Equip_Name, e.Equip_sub_category',
'd.Calc_Material',
'wh_equip e, wh_emis_detail d',
'd.segment_mass_kg/1000',
NULL,
'sum',
'e.site_name=''Chaparral Plant'' AND d.Site_Name=e.Site_Name AND d.Scenario=''GHG''',
'e.EQUIP_SUB_CATEGORY, e.EQUIP_NAME')
FROM DUAL;

or

SELECT pivot.pivot_query(
'e.Equip_Name, e.Equip_sub_category',
'd.Calc_Material',
'wh_equip e, wh_emis_detail d',
'd.segment_mass_kg/1000',
'CO2,CH4,N2O',
'sum',
'e.site_name=''Chaparral Plant'' AND d.Site_Name=e.Site_Name AND d.Scenario=''GHG''',
'e.EQUIP_SUB_CATEGORY, e.EQUIP_NAME')
FROM DUAL;

In the first I passed NULL so it works out the possible values itself using some dynamic SQL. In the second I specified the 3 values you had in your example.

BrianSherwood said...

Tony,
First of all, thanks for your quick response!
Secondly, DOH! Sorry to waste your time with obvious errors (once you pointed them out to me...)
Thirdly, this gives me the same problem as when I created my own 'Decode' select statements: the Grouping on Equip_Name is not being 'honored' by the 'sum'; I'm getting a sum for all records in the Site, not separate totals by Equip_Name.
Rendition:
Equip CO2 N2O CH4
------ --------- ---------- ----------
E-1 30325.708 .057 .572
E-2 30325.708 .057 .572
E-3 30325.708 .057 .572


It should have different values for each equipment number.

Tony Andrews said...

Brian,

Can you post the SQL it generated?

Tony

BrianSherwood said...

Here is the SQL:
select e.EQUIP_NAME , sum(DECODE(d.Calc_Material,'Carbon Dioxide', d.segment_mass_kg/1000)) as "Carbon Dioxide" , sum(DECODE(d.Calc_Material,'Nitrogen Oxide (N2O)', d.segment_mass_kg/1000)) as "Nitrogen Oxide (N2O)" , sum(DECODE(d.Calc_Material,'Methane', d.segment_mass_kg/1000)) as "Methane" from wh_equip e, wh_emis_detail d where e.site_name='Chaparral Plant' AND d.Site_Name=e.Site_Name AND d.Scenario='GHG' AND d.Start_Date >= TO_DATE('1/1/2010','mm/dd/yyyy') AND d.End_Date <= TO_DATE('12/31/2010','mm/dd/yyyy') group by e.EQUIP_NAME order by e.EQUIP_NAME

Thanks!

Tony Andrews said...

I can't see anything obvious. Try a simplified version, does that work as expected?

select e.EQUIP_NAME
, sum(d.segment_mass_kg/1000))
from wh_equip e, wh_emis_detail d
where e.site_name='Chaparral Plant'
AND d.Site_Name=e.Site_Name
AND d.Scenario='GHG'
AND d.Start_Date >= TO_DATE('1/1/2010','mm/dd/yyyy')
AND d.End_Date <= TO_DATE('12/31/2010','mm/dd/yyyy')
group by e.EQUIP_NAME
order by e.EQUIP_NAME

BrianSherwood said...

I'm still getting the same (total) value for all pieces of equipment.

Tony, again, thanks for your reply; I don't want to take up more of your time unless it is a problem with your code, which I don't think it is. I will work a bit more on the grouping problem and let you know when I find the solution.

BrianSherwood said...

Tony,
I simplified it even more (exclusing the join) and it works.
So, the problem has to do with the join. I left out one join condition (equip) which caused it to aggregate across the site instead of just for the equipment.

Thanks for all of your help!