मेरे आवेदन में, मेरे पास एक क्वेरी है जो "फाइलों" तालिका में एक खोज करता है।
तालिका "फाइलें" का विभाजन "एफ" द्वारा किया जाता है। "बनाई गई" (तालिका की परिभाषा देखें और क्लाइंट 19 ("एफ"। "cid = 19) के लिए ~ 26 मिलियन पंक्तियां हैं)।
यहाँ बिंदु है, अगर मैं यह प्रश्न करता हूँ:
SELECT "f"."id" AS "FileId"
, "f"."name" AS "FileName"
, "f"."year" AS "Fileyear"
, "f"."cid" AS "clientId"
, "f"."created" AS "FileDate"
, CASE WHEN ("vnVE0"."value" is not null AND "vnVE0"."value" != '')
THEN CAST("vnVE0"."value" AS decimal(28,2))
ELSE 0 END AS "keywordValueCol0_numeric"
FROM files "f"
OUTER APPLY
(
SELECT DISTINCT
VT.[value]
FROM dbo.value_number AS VT
WHERE
VT.id_file = F.id
AND VT.id_field = 260
) AS "vnVE0"
WHERE "grapado" IS NULL AND "masterversion" IS NULL AND ("f"."year" = 2013 OR "f"."year" = 0) AND "f"."cid" = 19
GROUP BY "f"."id", "f"."name", "f"."year", "f"."cid", "f"."created", CASE WHEN ("vnVE0"."value" is not null AND "vnVE0"."value" != '')
THEN CAST("vnVE0"."value" AS decimal(28,2))
ELSE 0 END
ORDER BY (SELECT NULL)
OFFSET 0 ROWS FETCH NEXT 50 ROWS ONLY;
मुझे निम्नलिखित निष्पादन योजना के साथ 0 सेकंड में परिणाम मिलते हैं: https://www.brentozar.com/pastetheplan/?id=SkV0-FDcG
लेकिन अगर मैं "नाम" से ऑर्डर करने की कोशिश करता हूं तो क्वेरी बहुत धीमी हो जाती है:
SELECT "f"."id" AS "FileId"
, "f"."name" AS "FileName"
, "f"."year" AS "Fileyear"
, "f"."cid" AS "clientId"
, "f"."created" AS "FileDate"
, CASE WHEN ("vnVE0"."value" is not null AND "vnVE0"."value" != '')
THEN CAST("vnVE0"."value" AS decimal(28,2))
ELSE 0 END AS "keywordValueCol0_numeric"
FROM files "f"
OUTER APPLY
(
SELECT DISTINCT
VT.[value]
FROM dbo.value_number AS VT
WHERE
VT.id_file = F.id
AND VT.id_field = 260
) AS "vnVE0"
WHERE "grapado" IS NULL AND "masterversion" IS NULL AND ("f"."year" = 2013 OR "f"."year" = 0) AND "f"."cid" = 19
GROUP BY "f"."id", "f"."name", "f"."year", "f"."cid", "f"."created", CASE WHEN ("vnVE0"."value" is not null AND "vnVE0"."value" != '')
THEN CAST("vnVE0"."value" AS decimal(28,2))
ELSE 0 END
ORDER BY "f"."name"
OFFSET 0 ROWS FETCH NEXT 50 ROWS ONLY;
इस क्वेरी में मुझे ११ मिनट लगते हैं, निम्नलिखित निष्पादन योजना के साथ परिणाम: https://www.brentozar.com/pastetheplan/?id=Sk3Fbtv9M
साथ ही, यदि मैं कॉलम द्वारा क्रम बदलता हूं, तो परिणाम समान होता है।
जैसा कि आप निष्पादन योजना में देख सकते हैं, सूचकांक "files_mv" की लागत 61% है, यह सूचकांक परिभाषा है:
CREATE NONCLUSTERED INDEX [files_mv] ON [dbo].[files]
(
[masterversion] ASC,
[year] ASC,
[cat_id] ASC,
[cid] ASC,
[eid] ASC,
[grapado] ASC,
[sub_id] ASC,
[tip_id] ASC
)
INCLUDE ( [id],
[name]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO
मैं Azure के साथ SQL सर्वर का उपयोग करता हूं। विशेष रूप से, मूल्य / मॉडल टियर "S4 एस्टैंडार (200 DTUs)" के साथ Azure SQL डेटाबेस।
मुझे बहुत सारे डेटा प्राप्त हो रहे हैं लेकिन मुझे लगता है कि इंटरनेट कनेक्शन एक अड़चन नहीं है, क्योंकि अन्य प्रश्नों में मुझे बहुत अधिक डेटा प्राप्त होता है और वे तेज़ होते हैं।
इसके अलावा, मैं इस तालिका में बड़े पैमाने पर डेटा सम्मिलित कर रहा हूं, और कुछ ही दिनों में मैं ~ 240 मिलियन पंक्तियों को फाइल टेबल (एक cid के लिए) और ~ 480 मिलियन पंक्तियों में value_number तालिका में कर दूंगा।
अतिरिक्त जानकारी
पार्टीटन फ़ंक्शन "PF_files_partitioning":
CREATE PARTITION FUNCTION PF_files_partitioning (DATETIME2(7))
AS
RANGE LEFT FOR VALUES ( '2013-03-31 23:59:59',
'2013-06-30 23:59:59',
'2013-09-30 23:59:59',
'2013-12-31 23:59:59',
'2014-03-31 23:59:59',
'2014-06-30 23:59:59',
'2014-09-30 23:59:59',
'2014-12-31 23:59:59',
'2015-03-31 23:59:59',
'2015-06-30 23:59:59',
'2015-09-30 23:59:59',
'2015-12-31 23:59:59',
'2016-03-31 23:59:59',
'2016-06-30 23:59:59',
'2016-09-30 23:59:59',
'2016-12-31 23:59:59',
'2017-03-31 23:59:59',
'2017-06-30 23:59:59',
'2017-09-30 23:59:59',
'2017-12-31 23:59:59',
'2018-03-31 23:59:59')
विभाजन योजना "PS_files_partitioning":
CREATE PARTITION SCHEME PS_files_partitioning AS PARTITION PF_files_partitioning ALL TO ([PRIMARY]);
** मेरे प्रत्येक विभाजन में लगभग 15 मिलियन पंक्तियाँ होंगी।
फ़ाइलें तालिका:
CREATE TABLE [dbo].[files](
[id] [bigint] IDENTITY(1,1) NOT NULL,
[cid] [tinyint] NOT NULL,
[eid] [bigint] NOT NULL,
[cat_id] [bigint] NOT NULL,
[tip_id] [bigint] NULL,
[sub_id] [bigint] NULL,
[year] [smallint] NOT NULL,
[caducidad] [smallint] NULL,
[grapadopri] [int] NOT NULL,
[grapado] [bigint] NULL,
[name] [nvarchar](255) NOT NULL,
[extension] [tinyint] NOT NULL,
[size] [bigint] NOT NULL,
[id_doc] [bit] NOT NULL,
[observaciones] [nvarchar](255) NOT NULL,
[indexed] [bit] NOT NULL,
[signed] [bit] NOT NULL,
[created] [datetime2](7) NOT NULL,
[name_lower] [nvarchar](255) NOT NULL,
[modified] [datetime2](7) NULL,
[related] [bit] NOT NULL,
[masterversion] [bigint] NULL,
[versioned] [bit] NOT NULL,
[hwsignature] [tinyint] NOT NULL,
[blockedUserId] [smallint] NULL,
CONSTRAINT [PK_files_id] PRIMARY KEY CLUSTERED
(
[id] ASC,
[created] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created]),
CONSTRAINT [files$estructure_unique] UNIQUE NONCLUSTERED
(
[cat_id] ASC,
[tip_id] ASC,
[sub_id] ASC,
[year] ASC,
[name] ASC,
[grapado] ASC,
[created] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
)
value_number तालिका:
CREATE TABLE [dbo].[value_number](
[id] [bigint] IDENTITY(1,1) NOT NULL,
[id_file] [bigint] NOT NULL DEFAULT ((0)),
[id_field] [bigint] NOT NULL DEFAULT ((0)),
[value] [nvarchar](255) NULL DEFAULT (NULL),
[id_doc] [bigint] NULL DEFAULT (NULL)
CONSTRAINT [PK_value_number_id] PRIMARY KEY CLUSTERED
(
[id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
)
फाइलों की तालिका का सूचकांक
CREATE NONCLUSTERED INDEX [files_clientes] ON [dbo].[files]
(
[cid] ASC
)
INCLUDE ([id]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [files_grapado] ON [dbo].[files]
(
[grapado] ASC
)
INCLUDE ( [id],
[name]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [files_mv] ON [dbo].[files]
(
[masterversion] ASC,
[year] ASC,
[cat_id] ASC,
[cid] ASC,
[eid] ASC,
[grapado] ASC,
[sub_id] ASC,
[tip_id] ASC
)
INCLUDE ( [id],
[name]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [files_ocr] ON [dbo].[files]
(
[cid] ASC,
[grapado] ASC,
[indexed] ASC,
[masterversion] ASC,
[extension] ASC
)
INCLUDE ( [id],
[eid],
[cat_id],
[tip_id],
[sub_id],
[year],
[name]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [files_ocr2] ON [dbo].[files]
(
[cid] ASC,
[eid] ASC,
[grapado] ASC,
[indexed] ASC,
[masterversion] ASC,
[extension] ASC
)
INCLUDE ( [id],
[cat_id],
[tip_id],
[sub_id],
[year],
[name]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [files_ocr3] ON [dbo].[files]
(
[cid] ASC,
[cat_id] ASC,
[grapado] ASC,
[indexed] ASC,
[masterversion] ASC,
[extension] ASC
)
INCLUDE ( [eid],
[tip_id],
[sub_id],
[year],
[name]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [busqueda_name] ON [dbo].[files]
(
[cid] ASC,
[eid] ASC,
[grapado] ASC,
[year] ASC
)
INCLUDE ( [id],
[cat_id],
[tip_id],
[sub_id],
[grapadopri],
[name],
[size],
[id_doc],
[signed],
[created],
[modified],
[related],
[masterversion]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [busqueda2] ON [dbo].[files]
(
[cid] ASC,
[eid] ASC,
[cat_id] ASC,
[grapado] ASC,
[masterversion] ASC,
[year] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [cid] ON [dbo].[files]
(
[cid] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [eid] ON [dbo].[files]
(
[eid] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [extension] ON [dbo].[files]
(
[extension] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [FK_files_archivo] ON [dbo].[files]
(
[grapado] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [FK_files_tipo] ON [dbo].[files]
(
[tip_id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [grapadopri] ON [dbo].[files]
(
[grapadopri] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [index_all] ON [dbo].[files]
(
[cid] ASC,
[eid] ASC,
[grapado] ASC,
[masterversion] ASC
)
INCLUDE ( [cat_id],
[tip_id],
[sub_id],
[year],
[grapadopri],
[name],
[size],
[id_doc],
[signed],
[created],
[modified],
[related],
[versioned]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [missing_index_7_6] ON [dbo].[files]
(
[cid] ASC,
[eid] ASC,
[grapado] ASC,
[name] ASC,
[year] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [ocrCloudClients] ON [dbo].[files]
(
[grapado] ASC,
[indexed] ASC,
[extension] ASC
)
INCLUDE ( [cid],
[eid],
[cat_id],
[tip_id],
[sub_id]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [searchEntity] ON [dbo].[files]
(
[cid] ASC,
[eid] ASC,
[grapado] ASC,
[masterversion] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
CREATE NONCLUSTERED INDEX [sub_id] ON [dbo].[files]
(
[sub_id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON PS_files_partitioning([created])
Value_number तालिका के अनुक्रमित
CREATE NONCLUSTERED INDEX [searchValues] ON [dbo].[value_number]
(
[id_field] ASC
)
INCLUDE ( [id_file],
[value]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
CREATE NONCLUSTERED INDEX [search] ON [dbo].[value_number]
(
[id_file] ASC,
[id_field] ASC
)
INCLUDE ( [value]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
CREATE NONCLUSTERED INDEX [id_field] ON [dbo].[value_number]
(
[id_field] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
CREATE NONCLUSTERED INDEX [FK_valueesN_documento] ON [dbo].[value_number]
(
[id_doc] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
CREATE NONCLUSTERED INDEX [FK_valueesN_archivo] ON [dbo].[value_number]
(
[id_file] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
आँकड़े आज तक हैं। मैंने वर्ष और अन्य कॉलम के लिए डेटा प्रकार बदल दिए हैं और अब प्रदर्शन थोड़ा बेहतर लग रहा है, लेकिन निष्पादन योजना अभी भी वही है। मैं कार्डिनैलिटी एस्टीमेशन (बदलते सूचकांक) को ठीक करने की कोशिश कर रहा हूं, लेकिन मुझे अभी तक सफलता नहीं मिली थी। Azure प्रलेखन के अनुसार मेरे पास डेटाबेस में 130 संगतता स्तर होना चाहिए और मेरे पास पहले से ProductVersion 12.0 के साथ 100 है।