MarkLogic Join Inquiry

Hi, I am new to marklogic and the Xquery world. I can't figure out how to start writing the following logic in Marklogic Xquery. I would be grateful if someone could give me an idea / sample so that I can achieve the following:

I want to execute an A.XML query based on a word search in B.XML. The request should produce C.XML. The logic should be as follows:

a.xml

<root>
<content> The state passed its first ban on using a handheld cellphone while driving in 2004 Nokia Vodafone Nokia Growth Recession Creicket HBO</content>
</root>

B.XML

<WordLookUp>
<companies>
    <company name="Vodafone">Vodafone</company>
    <company name="Nokia">Nokia</company>
</companies>
<topics>
    <topic group="Sports">Cricket</topic>
    <topic group="Entertainment">HBO</topic>
    <topic group="Finance">GDP</topic>
</topics>
<moods>
    <mood number="4">Growth</mood>
    <mood number="-5">Depression</mood>
    <mood number="-3">Recession</mood>
</moods>

C.XML (XML Result)

<root>
    <content> The state passed its first ban on using a handheld cellphone while driving in 2004 Nokia Vodafone Nokia Growth Recession Creicket HBO</content>
    <updatedElement>
        <companies>
            <company count="1">Vodafone</company>
            <company count="2">Nokia</company>
        </companies>
        <mood>1</mood>
        <topics>
             <topic count="1">Sports</topic>
             <topic count="1">Entertainment</topic>
        </topics>
            <word-count>22</word-count>
    </updatedElement>
    </root>
  • Find each company / text () from A.xml in B.xml if the found create tag is found: TAG {company count = "Number of occurrence of this word"} company / @ name {/ Company}

  • Search in each topic / text () of the A.xml file in B.xml if the found sign create tag TAG {topic topic = "Number of occurrences of this word"} topic / @ group {/ topic}

  • /() A.xml B.xml, [ * {/mood [first word]/@number}] + [ * {/mood [ ]/@number})]....

  • .

+3
3

/ XQuery, - , XQuery 1.0:

let $content := doc('file:///c:/temp/delete/A.xml')/*/*,
      $lookup := doc('file:///c:/temp/delete/B.xml')/*,
      $words := tokenize($content, '\W+')[.]
         return
           <root>
            {$content}
             <updatedElement>
               <companies>
                  {for $c in $lookup/companies/*,
                       $occurs in count(index-of($words, $c))
                     return
                       if($occurs)
                          then
                            <company count="{$occurs}">
                              {$c/text()}
                            </company>
                          else ()
                  }
               </companies>
               <mood>
                  {
                   sum($lookup/moods/*[false or index-of($words, data(.))]/@number)
                  }
               </mood>
               <topics>
                 {for $t in $lookup/topics/*,
                      $occurs in count(index-of($words, $t))
                    return
                      if($occurs)
                         then
                           <topic count="{$occurs}">
                             {data($t/@group)}
                           </topic>
                         else ()
                  }
               </topics>
               <word-count>{count($words)}</word-count>
              </updatedElement>
          </root>

A.xml B.XML( c:/temp/delete) , :

<root>
   <content> The state passed its first ban on using a handheld cellphone while driving in 2004 Nokia Vodafone Nokia Growth Recession Cricket HBO</content>
   <updatedElement>
      <companies>
         <company count="1">Vodafone</company>
         <company count="2">Nokia</company>
      </companies>
      <mood>1</mood>
      <topics>
         <topic count="1">Sports</topic>
         <topic count="1">Entertainment</topic>
      </topics>
      <word-count>22</word-count>
   </updatedElement>
</root>
-1

, - . !

: A.xml( "" → "" ).

MarkLogic :

  • cts:highlight ( , )
  • cts:tokenize ( , )

, , :

  • $cts:text ( , )
  • , xs:string:
    • cts:word,
    • cts:space
    • cts:punctuation.

!

xquery version "1.0-ml";

(: Generic function using MarkLogic ability to find query matches within a single node :)
declare function local:find-matches($content, $search-text) {
  cts:highlight($content, $search-text, <MATCH>{$cts:text}</MATCH>)
  //MATCH
};

(: Generic function using MarkLogic ability to tokenize text into words, punctuation, and spaces :)
declare function local:get-words($text) {
  cts:tokenize($text)[. instance of cts:word]
};

(: The rest of this is pure XQuery :)
let $content := doc("A.xml")/root/content,
    $lookup  := doc("B.xml")/WordLookUp
return
  <root>
    {$content}
    <updatedElement>

      <companies>{
        for $company in $lookup/companies/company
        let $results := local:find-matches($content, string($company))
        where exists($results)
        return
          <company count="{count($results)}">{string($company/@name)}</company>
      }</companies>

      <mood>{
        sum(
          for $mood in $lookup/moods/mood
          let $results := local:find-matches($content, string($mood))
          return count($results) * $mood/@number
        )
      }</mood>

      <topics>{
        for $topic in $lookup/topics/topic
        let $results := local:find-matches($content, string($topic))
        where exists($results)
        return
          <topic count="{count($results)}">{string($topic/@group)}</topic>
      }</topics>

      <word-count>{
        count(local:get-words($content))
      }</word-count>

    </updatedElement>
  </root>

, - , . cts:search cts:contains, MarkLogic. , ( ), . - , cts:search cts:contains.

: , <MATCH>, cts:highlight (, , ). (, ).

:

, cts:highlight, , cts:tokenize . local:find-matches ( , , ):

(: Find word matches by comparing them one-by-one :)
declare function local:find-matches($content, $search-text) {
  local:get-words($content)[cts:stem(.) = cts:stem($search-text)]
};

cts:stem , , , "pass" "" .. () , , , cts:highlight, , cts:search cts:contains, cts: ( /, ).

+2

You may need to step back and ask if you can better serve the modeling of your data and documents for use with a documented database instead of rdbms

0
source

All Articles