SQL Server - Necesito optimizar este query de MapReduce

 
Vista:

Necesito optimizar este query de MapReduce

Publicado por MFCO (1 intervención) el 27/05/2015 02:47:45
ste SQL me corre en 34 segundos, necesito que me corra en menos! Pero no se si lo que tengo que optimizar es la última parte donde esta el contador o antes! Debe ser com MapReduce.

----- PRIMER PASO

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
IF NOT EXISTS (SELECT * FROM sys.types WHERE is_user_defined = 1 AND name = 'word_t')
CREATE TYPE word_t FROM nvarchar(32) NOT NULL;
 
IF NOT EXISTS (SELECT * FROM sys.types WHERE is_user_defined = 1 AND name = 'words_t')
CREATE TYPE words_t FROM nvarchar(max) NOT NULL;
 
IF NOT EXISTS (SELECT * FROM sys.types WHERE is_table_type = 1 AND name = 'wordcnts_t')
CREATE TYPE wordcnts_t AS TABLE ( words nvarchar(4000) NOT NULL, countofwords int );
 
IF NOT EXISTS (SELECT * FROM sys.types WHERE is_table_type = 1 AND name = 'DocumentsTable')
CREATE TYPE DocumentsTable AS TABLE (Content nvarchar(max) NOT NULL)
GO
 
IF NOT EXISTS (SELECT * FROM sys.types WHERE is_table_type = 1 AND name = 'DocumentsWithIndexTable')
CREATE TYPE DocumentsWithIndexTable AS TABLE (Indx int, Content nvarchar(max) NOT NULL)
GO
 
IF NOT EXISTS (SELECT * FROM sys.types WHERE is_table_type = 1 AND name = 'DocumentsWithAutoIncrementIndexTable')
CREATE TYPE DocumentsWithAutoIncrementIndexTable AS TABLE (
       P_Id int PRIMARY KEY IDENTITY, Content nvarchar(max) NOT NULL)
GO
 
------ SEGUNDO PASO
 
 
drop function dbo.mapper
go
 
create function dbo.mapper(@documents db_owner.DocumentsWithAutoIncrementIndexTable READONLY, @sep nvarchar)
returns @t TABLE ( wordindex int, word db_owner.word_t )
WITH EXECUTE AS CALLER
as begin
 declare @lastidprocessed int, @istart int
 declare @len int, @wordcounter int, @id int, @pos int
 declare @document db_owner.words_t, @word db_owner.word_t
 
 select @lastidprocessed=0
 
 While EXISTS(SELECT * From @documents WHERE @lastidprocessed < P_Id)
 Begin
Select Top 1 @id = P_Id, @document = Content From @documents WHERE @lastidprocessed < P_Id
set @istart = 1;
set @len = LEN(@document);
 
-- For every word within a document
    While (@istart <= @len)
    Begin
set @pos = CHARINDEX ( @sep ,@document, @istart )
if (@pos = 0)
begin
set @word = SUBSTRING(@document, @istart, @len);
insert into @t values ( @wordcounter, @word )
set @istart = @len + 1;
set @wordcounter = @wordcounter + 1;
end
else
begin
set @word = SUBSTRING(@document, @istart, @pos - @istart);
insert into @t values ( @wordcounter, @word )
set @istart = @pos + 1;
set @wordcounter = @wordcounter + 1;
end
End
 
set @lastidprocessed = @id
End
Return
end
go
 
------ TERCER PASO
 
 
drop function dbo.reducer
go
 
 
create function dbo.reducer(@words db_owner.DocumentsWithAutoIncrementIndexTable READONLY)
returns @t TABLE ( word db_owner.word_t, wordcount int )
WITH EXECUTE AS CALLER
as BEGIN
declare @word db_owner.word_t, @previousword db_owner.word_t
declare @lastidprocessed integer, @id integer, @wordoccurrences integer
 
select @lastidprocessed=0
While EXISTS(SELECT * From @words WHERE @lastidprocessed < P_Id)
Begin
Select Top 1 @id = P_Id, @word = Content From @words WHERE @lastidprocessed < P_Id
 
-- For every word, see if it's the same as the previous one
if (@word <> @previousword)
begin
If (@previousword <> '')
begin
insert into @t values ( @previousword, @wordoccurrences );
end
set @wordoccurrences = 1;
end
else
begin 
set @wordoccurrences = @wordoccurrences + 1;
end 
     
set @previousword = @word;
set @lastidprocessed = @id
End        
insert into @t values ( @previousword, @wordoccurrences );
Return

END  
go

------ CUARTO PASO 

declare @contador integer 
select @contador=1  
DECLARE @myDocuments db_owner.DocumentsWithAutoIncrementIndexTable
while @contador < 10000
begin
 INSERT INTO @myDocuments VALUES('The quick brown fox jumps over the lazy dog');
 INSERT INTO @myDocuments VALUES('The brown dog walks through the brown forest');
 INSERT INTO @myDocuments VALUES('Little red riding hood looks like a fox in her red dress');
 select @contador= @contador+1
end

DECLARE @myWords db_owner.DocumentsWithAutoIncrementIndexTable
INSERT INTO @myWords(Content) SELECT word FROM dbo.mapper(@myDocuments, ' ') order by word asc

select * from dbo.reducer(@myWords) order by wordcount desc
Valora esta pregunta
Me gusta: Está pregunta es útil y esta claraNo me gusta: Está pregunta no esta clara o no es útil
0
Responder
Imágen de perfil de Isaias
Val: 3.250
Oro
Ha mantenido su posición en SQL Server (en relación al último mes)
Gráfica de SQL Server

Necesito optimizar este query de MapReduce

Publicado por Isaias (4558 intervenciones) el 27/05/2015 19:22:56
¿Ya pasaste tu query por tuning advisor?
Valora esta respuesta
Me gusta: Está respuesta es útil y esta claraNo me gusta: Está respuesta no esta clara o no es útil
0
Comentar