Effective SQL time cuts

Question

Effective SQL time cuts

I would like to stretch time series to different lengths using SQL efficiently. Suppose I have the following data:

-- drop table if exists time_series;

create table time_series (
  id serial,
  val numeric)
;

insert into time_series (val) values 
     (1), (2), (3), (4), (5), (6), 
     (5), (4), (3), (2), (1);

This time series has a length of 11, and I would like to stretch it to a length of 15 so that the sum of the values in the stretched time series is the same as the sum of the values in the original time series. I have a solution that is inefficient:

select
  new_id,
  sum(new_val) as new_val
from
  (
    select 
      id, 
      val/15.0 as new_val,
      ceil(row_number() over(order by id, gs) / 11.0) as new_id
    from 
      time_series 
      cross join (select generate_series(1, 15) gs) gs 
  ) raw_data
group by
    new_id
order by
  new_id
;

First create a table with 15 * 11 rows, and then collapse it back to 15 rows.

While this works well for small time series, performance degrades significantly with longer time series. Given that I would like to stretch 2,000 lines to 3,000, than the request should first generate 6M lines (takes 30 seconds on my laptop).

Test data:

insert into time_series (val) select generate_series(1, 1000);
insert into time_series (val) select generate_series(1000, 1, -1);

SQL ?

+3

sql postgresql

Tomas Greif 25 . '14 7:52

2

.

ts1 , . () - new_val.

+1-1 1,2,3,... 0,1,2,....

select 
  gs as new_id,
  Sval+(Eval-SVal)*((gs.gs-1) /(100.0/(11.0-1))+1-ts1.ID) as new_val,
  SVal as StartInterval,
  EVal as EndInterval       
from 
  (Select generate_series(1, 100) gs) gs 
  left join
  (select T1.ID, T1.Val SVal,T2.Val EVal
     FROM
     time_series T1
     JOIN time_series T2 ON T1.Id=T2.ID-1) ts1 
   ON floor((gs.gs-1) /(100.0/(11.0-1)))+1=ts1.ID 
order by
gs

+1

valex 25 . '14 10:37

Tomas Greif · Accepted Answer · 2014-02-27T10:38:05+0000

. 5 30 , :

with time_series (id, val) as (values
  (1, 1),
  (2, 2),
  (3, 3),
  (4, 2),
  (5, 1)
)

, mapping_to_old_ts_ids as (
  select 
    gs as new_id,
    case when mod(((gs - 1) * otsl + 1), ntsl) <> 0 then ((gs - 1) * otsl + 1) / ntsl + 1 else ((gs - 1) * otsl + 1) / ntsl end as old_id_start,
    case mod(((gs - 1) * otsl + 1), ntsl) when 0 then ntsl else mod(((gs - 1) * otsl + 1), ntsl) end as old_id_start_piece,
    case when mod((gs * otsl), ntsl) <> 0 then (gs * otsl) / ntsl + 1 else (gs * otsl) / ntsl end as old_id_end,
    case mod((gs * otsl), ntsl) when 0 then ntsl else mod((gs * otsl), ntsl) end as old_id_end_piece,
    ntsl
  from 
    (select generate_series(1, ntsl) as gs, ntsl from (select 30 as ntsl) a) new_time_series
    cross join (select count(*) as otsl from time_series) old_time_series_length    
)

select
  new_id,
    case 
      when old_id_start = old_id_end then (old_id_end_piece - old_id_start_piece + 1) / ntsl::numeric * ts1.val 
      when old_id_start <> old_id_end then (ntsl::numeric - old_id_start_piece +1 ) / ntsl::numeric * ts1.val + coalesce((old_id_end_piece / ntsl::numeric * ts2.val), 0) end
from
  mapping_to_old_ts_ids oid
  join time_series ts1 on (oid.old_id_start = ts1.id)
  left join time_series ts2 on (oid.old_id_end = ts2.id)
order by 
  new_id

- , . , ( 5 8):

with time_series (id, val) as (values
  (1, 1),
  (2, 2),
  (3, 3),
  (4, 2),
  (5, 1)
)

/* The basic idea is to divide every element into 8 pieces and then aggregate it 
   back by 5 elements. When trying to stretch 5 into 8, we will have 5 * 8 = 40
   elements. For every element in new time series we can calculate what is the id
   of first and last piece. */    
, piece_start_end as (
  select 
    gs as new_id,
    (gs - 1) * 5 + 1 as piece_start,
    gs * 5 as piece_end
  from 
    generate_series(1, 8) gs
)


/* No we need to calculate where exactly in the old time series we have beginning
and end of pieces. E.g. 1st element of new time series starts in element 1 at position 1
and ends in element 1 at position 5. 2nd element of new time series starts in element 1
at position 6 and ends in element 2 at position 2. */
, mapping_to_old_ts_ids as (
  select 
    *, 
    case when mod(piece_start, 8) <> 0 then piece_start / 8 + 1 else piece_start / 8 end as old_id_start,
    case mod(piece_start, 8) when 0 then 8 else mod(piece_start, 8) end as old_id_start_piece,

    case when mod(piece_end, 8) <> 0 then piece_end / 8 + 1 else piece_end / 8 end as old_id_end,
    case mod(piece_end, 8) when 0 then 8 else mod(piece_end, 8) end as old_id_end_piece
  from 
    piece_start_end
)

/* In final step we just need to assign final value to new time series by taking
 appropriate number of pieces from old time series elements. */


select
    new_id,

    old_id_start,
    old_id_start_piece,
    ts1.val as old_id_start_val,

    old_id_end,
    old_id_end_piece,
    ts2.val as old_id_end_val,

    case 
      when old_id_start = old_id_end then (old_id_end_piece - old_id_start_piece + 1) / 8.0 * ts1.val 
      when old_id_start <> old_id_end then (8 - old_id_start_piece +1 ) / 8.0 * ts1.val + coalesce((old_id_end_piece / 8.0 * ts2.val), 0) end

from
  mapping_to_old_ts_ids oid
  join time_series ts1 on (oid.old_id_start = ts1.id)
  left join time_series ts2 on (oid.old_id_end = ts2.id)

Effective SQL time cuts

More articles: