, - . !
: A.xml( "" → "" ).
MarkLogic :
cts:highlight ( , )cts:tokenize ( , )
, , :
$cts:text ( , )- ,
xs:string:cts:word,cts:spacects:punctuation.
!
xquery version "1.0-ml";
(: Generic function using MarkLogic ability to find query matches within a single node :)
declare function local:find-matches($content, $search-text) {
cts:highlight($content, $search-text, <MATCH>{$cts:text}</MATCH>)
//MATCH
};
(: Generic function using MarkLogic ability to tokenize text into words, punctuation, and spaces :)
declare function local:get-words($text) {
cts:tokenize($text)[. instance of cts:word]
};
(: The rest of this is pure XQuery :)
let $content := doc("A.xml")/root/content,
$lookup := doc("B.xml")/WordLookUp
return
<root>
{$content}
<updatedElement>
<companies>{
for $company in $lookup/companies/company
let $results := local:find-matches($content, string($company))
where exists($results)
return
<company count="{count($results)}">{string($company/@name)}</company>
}</companies>
<mood>{
sum(
for $mood in $lookup/moods/mood
let $results := local:find-matches($content, string($mood))
return count($results) * $mood/@number
)
}</mood>
<topics>{
for $topic in $lookup/topics/topic
let $results := local:find-matches($content, string($topic))
where exists($results)
return
<topic count="{count($results)}">{string($topic/@group)}</topic>
}</topics>
<word-count>{
count(local:get-words($content))
}</word-count>
</updatedElement>
</root>
, - , . cts:search cts:contains, MarkLogic. , ( ), . - , cts:search cts:contains.
: , <MATCH>, cts:highlight (, , ). (, ).
:
, cts:highlight, , cts:tokenize . local:find-matches ( , , ):
(: Find word matches by comparing them one-by-one :)
declare function local:find-matches($content, $search-text) {
local:get-words($content)[cts:stem(.) = cts:stem($search-text)]
};
cts:stem , , , "pass" "" .. () , , , cts:highlight, , cts:search cts:contains, cts: ( /, ).