IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[vGetVisits]') AND type in (N'U'))
DROP TABLE [dbo].[vGetVisits]
GO
CREATE TABLE [dbo].[vGetVisits](
[id] [int] NOT NULL,
[mydate] [datetime] NOT NULL,
CONSTRAINT [PK_vGetVisits] PRIMARY KEY CLUSTERED
(
[id] ASC
)
)
GO
INSERT INTO [dbo].[vGetVisits]([id], [mydate])
VALUES
(1, '2014-01-01 11:00'),
(2, '2014-01-03 10:00'),
(3, '2014-01-04 09:30'),
(4, '2014-04-01 10:00'),
(5, '2014-05-01 11:00'),
(6, '2014-07-01 09:00'),
(7, '2014-07-31 08:00');
GO
-- Clean up
IF OBJECT_ID (N'dbo.udfLastHitRecursive', N'FN') IS NOT NULL
DROP FUNCTION udfLastHitRecursive;
GO
-- Actual Function
CREATE FUNCTION dbo.udfLastHitRecursive
( @MyDate datetime)
RETURNS TINYINT
AS
BEGIN
-- Your returned value 1 or 0
DECLARE @Returned_Value TINYINT;
SET @Returned_Value=0;
-- Prepare gaps table to be used.
WITH gaps AS
(
-- Select Date and MaxDiff from the original table
SELECT
CONVERT(Date,mydate) AS [date]
, DATEDIFF(day,ISNULL(LAG(mydate, 1) OVER (ORDER BY mydate), mydate) , mydate) AS [MaxDiff]
FROM dbo.vGetVisits
)
SELECT @Returned_Value=
(SELECT DISTINCT -- DISTINCT in case we have same date but different time
CASE WHEN
(
-- It is a first entry
[date]=(SELECT MIN(CONVERT(Date,mydate)) FROM dbo.vGetVisits))
OR
/*
--Gap between last qualifying date and entered is greater than 90
Calculate Running sum upto and including required date
and find a remainder of division by 91.
*/
((SELECT SUM(t1.MaxDiff)
FROM (SELECT [MaxDiff] FROM gaps WHERE [date]<=t2.[date]
) t1
)%91 -
/*
ISNULL added to include first value that always returns NULL
Calculate Running sum upto and NOT including required date
and find a remainder of division by 91
*/
ISNULL((SELECT SUM(t1.MaxDiff)
FROM (SELECT [MaxDiff] FROM gaps WHERE [date]<t2.[date]
) t1
)%91, 0) -- End ISNULL
<0 )
/* End Running sum upto and including required date */
OR
-- Gap between two nearest dates is greater than 90
((SELECT SUM(t1.MaxDiff)
FROM (SELECT [MaxDiff] FROM gaps WHERE [date]<=t2.[date]
) t1
) - ISNULL((SELECT SUM(t1.MaxDiff)
FROM (SELECT [MaxDiff] FROM gaps WHERE [date]<t2.[date]
) t1
), 0) > 90)
THEN 1
ELSE 0
END
AS [Qualifying]
FROM gaps t2
WHERE [date]=CONVERT(Date,@MyDate))
-- What is neccesary to return when entered date is not in dbo.vGetVisits?
RETURN @Returned_Value
END
GO
SELECT
dbo.udfLastHitRecursive(mydate) AS [Qualifying]
, [id]
, mydate
FROM dbo.vGetVisits
ORDER BY mydate
Résultat
Consultez également Comment calculer le total cumulé dans SQL Server
mise à jour: veuillez voir ci-dessous les résultats des tests de performances.
En raison de la logique différente utilisée pour trouver un "écart de 90 jours", les ypercubes et mes solutions s'ils sont laissés intacts peuvent renvoyer des résultats différents à la solution de Paul White. Cela est dû à l'utilisation des fonctions DATEDIFF et DATEADD respectivement.
Par exemple:
SELECT DATEADD(DAY, 90, '2014-01-01 00:00:00.000')
renvoie «2014-04-01 00: 00: 00.000», ce qui signifie que «2014-04-01 01: 00: 00.000» est au-delà d'un intervalle de 90 jours
mais
SELECT DATEDIFF(DAY, '2014-01-01 00:00:00.000', '2014-04-01 01:00:00.000')
Renvoie «90», ce qui signifie qu'il est toujours dans l'écart.
Prenons un exemple de détaillant. Dans ce cas, la vente d'un produit périssable qui s'est vendu à la date «2014-01-01» à «2014-01-01 23: 59: 59: 999» est acceptable. La valeur DATEDIFF (DAY, ...) dans ce cas est donc OK.
Un autre exemple est un patient qui attend d'être vu. Pour quelqu'un qui vient le '2014-01-01 00: 00: 00: 000' et qui part le '2014-01-01 23: 59: 59: 999', il est 0 (zéro) jour si DATEDIFF est utilisé même si le l'attente réelle était de près de 24 heures. Encore une fois, le patient qui vient au '2014-01-01 23:59:59' et repart au '2014-01-02 00:00:01' a attendu un jour si DATEDIFF est utilisé.
Mais je m'égare.
J'ai laissé les solutions DATEDIFF et même les performances testées, mais elles devraient vraiment être dans leur propre ligue.
Il a également été noté que pour les grands ensembles de données, il est impossible d'éviter les valeurs du même jour. Donc, si nous disons 13 millions d'enregistrements couvrant 2 ans de données, nous finirons par avoir plus d'un enregistrement pendant quelques jours. Ces enregistrements sont filtrés dès que possible dans mes solutions DATEDIFF et ypercube. J'espère que ypercube ne s'en soucie pas.
Les solutions ont été testées sur le tableau suivant
CREATE TABLE [dbo].[vGetVisits](
[id] [int] NOT NULL,
[mydate] [datetime] NOT NULL,
)
avec deux index clusterisés différents (mydate dans ce cas):
CREATE CLUSTERED INDEX CI_mydate on vGetVisits(mydate)
GO
Le tableau a été rempli de la manière suivante
SET NOCOUNT ON
GO
INSERT INTO dbo.vGetVisits(id, mydate)
VALUES (1, '01/01/1800')
GO
DECLARE @i bigint
SET @i=2
DECLARE @MaxRows bigint
SET @MaxRows=13001
WHILE @i<@MaxRows
BEGIN
INSERT INTO dbo.vGetVisits(id, mydate)
VALUES (@i, DATEADD(day,FLOOR(RAND()*(3)),(SELECT MAX(mydate) FROM dbo.vGetVisits)))
SET @i=@i+1
END
Pour un cas de plusieurs millions de lignes, INSERT a été modifié de telle manière que des entrées de 0 à 20 minutes ont été ajoutées au hasard.
Toutes les solutions ont été soigneusement emballées dans le code suivant
SET NOCOUNT ON
GO
DECLARE @StartDate DATETIME
SET @StartDate = GETDATE()
--- Code goes here
PRINT 'Total milliseconds: ' + CONVERT(varchar, DATEDIFF(ms, @StartDate, GETDATE()))
Codes réels testés (sans ordre particulier):
La solution DATEDIFF d' Ypercube ( YPC, DATEDIFF )
DECLARE @cd TABLE
( TheDate datetime PRIMARY KEY,
Qualify INT NOT NULL
);
DECLARE
@TheDate DATETIME,
@Qualify INT = 0,
@PreviousCheckDate DATETIME = '1799-01-01 00:00:00'
DECLARE c CURSOR
LOCAL STATIC FORWARD_ONLY READ_ONLY
FOR
SELECT
mydate
FROM
(SELECT
RowNum = ROW_NUMBER() OVER(PARTITION BY cast(mydate as date) ORDER BY mydate)
, mydate
FROM
dbo.vGetVisits) Actions
WHERE
RowNum = 1
ORDER BY
mydate;
OPEN c ;
FETCH NEXT FROM c INTO @TheDate ;
WHILE @@FETCH_STATUS = 0
BEGIN
SET @Qualify = CASE WHEN DATEDIFF(day, @PreviousCheckDate, @Thedate) > 90 THEN 1 ELSE 0 END ;
IF @Qualify=1
BEGIN
INSERT @cd (TheDate, Qualify)
SELECT @TheDate, @Qualify ;
SET @PreviousCheckDate=@TheDate
END
FETCH NEXT FROM c INTO @TheDate ;
END
CLOSE c;
DEALLOCATE c;
SELECT TheDate
FROM @cd
ORDER BY TheDate ;
La solution DATEADD d'Ypercube ( YPC, DATEADD )
DECLARE @cd TABLE
( TheDate datetime PRIMARY KEY,
Qualify INT NOT NULL
);
DECLARE
@TheDate DATETIME,
@Next_Date DATETIME,
@Interesting_Date DATETIME,
@Qualify INT = 0
DECLARE c CURSOR
LOCAL STATIC FORWARD_ONLY READ_ONLY
FOR
SELECT
[mydate]
FROM [test].[dbo].[vGetVisits]
ORDER BY mydate
;
OPEN c ;
FETCH NEXT FROM c INTO @TheDate ;
SET @Interesting_Date=@TheDate
INSERT @cd (TheDate, Qualify)
SELECT @TheDate, @Qualify ;
WHILE @@FETCH_STATUS = 0
BEGIN
IF @TheDate>DATEADD(DAY, 90, @Interesting_Date)
BEGIN
INSERT @cd (TheDate, Qualify)
SELECT @TheDate, @Qualify ;
SET @Interesting_Date=@TheDate;
END
FETCH NEXT FROM c INTO @TheDate;
END
CLOSE c;
DEALLOCATE c;
SELECT TheDate
FROM @cd
ORDER BY TheDate ;
La solution de Paul White ( PW )
;WITH CTE AS
(
SELECT TOP (1)
T.[mydate]
FROM dbo.vGetVisits AS T
ORDER BY
T.[mydate]
UNION ALL
SELECT
SQ1.[mydate]
FROM
(
SELECT
T.[mydate],
rn = ROW_NUMBER() OVER (
ORDER BY T.[mydate])
FROM CTE
JOIN dbo.vGetVisits AS T
ON T.[mydate] > DATEADD(DAY, 90, CTE.[mydate])
) AS SQ1
WHERE
SQ1.rn = 1
)
SELECT
CTE.[mydate]
FROM CTE
OPTION (MAXRECURSION 0);
Ma solution DATEADD ( PN, DATEADD )
DECLARE @cd TABLE
( TheDate datetime PRIMARY KEY
);
DECLARE @TheDate DATETIME
SET @TheDate=(SELECT MIN(mydate) as mydate FROM [dbo].[vGetVisits])
WHILE (@TheDate IS NOT NULL)
BEGIN
INSERT @cd (TheDate) SELECT @TheDate;
SET @TheDate=(
SELECT MIN(mydate) as mydate
FROM [dbo].[vGetVisits]
WHERE mydate>DATEADD(DAY, 90, @TheDate)
)
END
SELECT TheDate
FROM @cd
ORDER BY TheDate ;
Ma solution DATEDIFF ( PN, DATEDIFF )
DECLARE @MinDate DATETIME;
SET @MinDate=(SELECT MIN(mydate) FROM dbo.vGetVisits);
;WITH gaps AS
(
SELECT
t1.[date]
, t1.[MaxDiff]
, SUM(t1.[MaxDiff]) OVER (ORDER BY t1.[date]) AS [Running Total]
FROM
(
SELECT
mydate AS [date]
, DATEDIFF(day,LAG(mydate, 1, mydate) OVER (ORDER BY mydate) , mydate) AS [MaxDiff]
FROM
(SELECT
RowNum = ROW_NUMBER() OVER(PARTITION BY cast(mydate as date) ORDER BY mydate)
, mydate
FROM dbo.vGetVisits
) Actions
WHERE RowNum = 1
) t1
)
SELECT [date]
FROM gaps t2
WHERE
( ([Running Total])%91 - ([Running Total]- [MaxDiff])%91 <0 )
OR
( [MaxDiff] > 90)
OR
([date]=@MinDate)
ORDER BY [date]
J'utilise SQL Server 2012, donc je m'excuse auprès de Mikael Eriksson, mais son code ne sera pas testé ici. Je m'attendrais toujours à ce que ses solutions avec DATADIFF et DATEADD retournent des valeurs différentes sur certains ensembles de données.
Et les résultats réels sont: