XQuery/XML to RDF

< XQuery

For the Emp-DEPT case study, RDF must be generated from underlying XML files. An XQuery script generates the RDF. It uses a configuration file to define how columns of a table should be mapped into RDF and the namespaces to be used. This mapping needs a little more work to allow composite keys and allow user defined transformations. An interactive tool to create this map would be useful.

Issues in mapping to RDF

The main guide to publishing linked data on the web is How to Publish Linked Data on the Web. This work connected with the Wikibook entry consists in progressively applying the principles enunciated there.

This conversion illustrates a few of the differences between local datasets, whether SQL or XML, and a dataset designed to fit into a global database. Some decisions remain unclear.

[The choices made here are those of a novice and review would be welcome. ]

Some issues not yet addressed:

Configuration file

To facilitate the conversion from XML to RDF, a separate configuration file is defined. Here is the configuration file for the emp-dept data.

<?xml version="1.0" encoding="UTF-8"?>
<XML-to-RDF>
  <namespaces>
        <namespace prefix="f" uri="http://www.cems.uwe.ac.uk/empdept/concept/" />
        <namespace prefix="ft" uri="http://www.cems.uwe.ac.uk/empdept/"/>
        <namespace prefix="rdf" uri="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />
        <namespace prefix="rdfs" uri="http://www.w3.org/2000/01/rdf-schema#" />
        <namespace prefix="foaf" uri="http://xmlns.com/foaf/0.1/" />
        <namespace prefix="xs" uri="http://www.w3.org/2001/XMLSchema#" />
    </namespaces>
    <map type="emp" prefix="f">
        <source file="/db/Wiki/empdept/emp.xml" path="//Emp"/>
        <col name="EmpNo" pk="true" uribase="ft:emp" type="xs:string"/>
        <col name="Ename" prefix="rdfs" tag="label"/>
        <col name="Sal" type="xs:integer"/>
        <col name="Comm" type="xs:integer"/>
        <col name="HireDate" type="xs:date"/>
        <col name="MgrNo" tag="Mgr" uribase="ft:emp"/>
        <col name="MgrNo"/>
        <col name="DeptNo" tag="Dept" uribase="ft:dept"/>
        <col name="Ename" prefix="foaf" tag="surname"/>
        <col name="Job"/>
    </map>
    <map type="dept" prefix="f">
        <source file="/db/Wiki/empdept/dept.xml" path="//Dept"/>
        <col name="Dname" prefix="rdfs" tag="label"/>
        <col name="Dname"/>
        <col name="Location" uribase="http://dbpedia.org/resource"/>
        <col name="DeptNo" pk="true" uribase="ft:dept" type="xs:string"/>
    </map>
    <map type="salgrade" prefix="f">
        <source file="/db/Wiki/empdept/salgrade.xml" path="//SalGrade"/>
        <col name="HiSal" type="xs:integer"/>
        <col name="LoSal" type="xs:integer"/>
        <col name="Grade" pk="true" uribase="ft:grade" type="xs:integer"/>
        <col name="Grade" prefix="rdfs" tag="label"/>
    </map>

</XML-to-RDF>

Data base conversion functions

One function row-to-rdf generates the RDF for a row of a table, another function map-to-schema generates RDFS descriptions of the predicates used in a table.


module namespace  fr= "http://www.cems.uwe.ac.uk/wiki/fr";
import module namespace util = "http://exist-db.org/xquery/util";

declare namespace rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
declare namespace rdfs = "http://www.w3.org/2000/01/rdf-schema#";

declare function fr:declare-namespaces($config) { 
  for $ns in $config//namespace[@declare="yes"]
  return util:declare-namespace($ns/@prefix,xs:anyURI($ns/@uri))
};

declare function fr:expand($qname as xs:string?, $map ) as xs:string ?{
   let $namespace := $map/..//namespace
   return
   if ($qname)
   then  if (contains($qname,":"))
             then  let $qs := tokenize($qname,":")
                       let $prefix := $qs[1]
                       let $name := $qs[2]
                       let $uri := $namespace[@prefix=$prefix]/@uri
                       return concat($uri,$name)
              else if ($namespace[@prefix = $qname])
                       then 
                         $namespace[@prefix = $qname]/@uri
              else 
                   $qname
    else ()
};


declare function fr:row-to-rdf($row as element() , $map as element() ) as element(rdf:Description) * {
       let $pk := $map/col[@pk="true"]
       let $pkv :=  string($row/*[name()=$pk/@name])
       let $pkuri := fr:expand($pk/@uribase, $map)
       return
         <rdf:Description>
            {attribute rdf:about {concat($pkuri,"/",$pkv)}}    
            { if ($map/@type)
              then 
                       let $typeuri := fr:expand(concat($map/@prefix,":",$map/@type),$map)
                       return <rdf:type rdf:resource="{$typeuri}"/>
              else ()
            }
            {for $col  in $map/col
             let $name := $col/@name
             let $data := string($row/*[name(.)=$name])  
              return 
              if ($data !="")
              then 
                   element { concat(($col/@prefix,$map/@prefix)[1], ":", ($col/@tag,$name)[1])}
                   {    
                           if ($col/@type)
                           then (attribute rdf:datatype
                                   {  fr:expand($col/@type,$map)} ,
                                   $data)
                            else if ( $col/@uribase )
                            then  attribute rdf:resource
                                  {concat(fr:expand($col/@uribase,$map), "/",replace($data," ","_"))}
                            else  $data
                    }
              else ()           
            } 
         </rdf:Description>
 };
 
 declare function fr:map-to-schema ($map as element()) as element(rdf:Description) * {
     let $typeuri := fr:expand(concat($map/@prefix,":",$map/@type),$map)
     for $col in $map/col[@type]
     let $prop := concat( fr:expand(($col/@prefix,$map/@prefix)[1],$map ), ($col/@tag,$col/@name)[1])
     let $rangeuri := ( fr:expand($col/@type,$map), fr:expand($col/@uribase,$map),"http://www.w3.org/2000/01/rdf-schema#literal")[1]
     return
       <rdf:Description rdf:about="{$prop}">
           <rdf:type  rdf:resource="http://www.w3.org/1999/02/22-rdf-syntax-ns#Property"/>
           <rdfs:domain rdf:resource="{$typeuri}"/>
           <rdfs:range rdf:resource="{$rangeuri}"/>
           <rdf:label>{string($col/@name)}</rdf:label>
       </rdf:Description>
 };

Full database conversion

The script to generate the RDF for the full database:

import module namespace fr="http://www.cems.uwe.ac.uk/wiki/fr"   at  "fr.xqm";
declare namespace rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
declare namespace rdfs = "http://www.w3.org/2000/01/rdf-schema#";

declare variable $config :=  doc(request:get-parameter("config",()));
declare variable $x := fr:declare-namespaces($config);

<rdf:RDF>
{
  for $map in $config//map
  let $xml := doc($map/source/@file)
  let $source := util:eval(concat("$xml",$map/source/@path))
  return 
        (for $row in $source  return fr:row-to-rdf($row,$map),
         fr:map-to-schema($map)
        )
}
</rdf:RDF>

Links

Resource RDF

In addition each resource is retrieved as RDF. In this simple example, the request for a resource URI like:

http://www.cems.uwe.ac.uk/empdept/emp/7839

is re-written by Apache to

http://www.cems.uwe.ac.uk/xmlwiki/RDF/empdeptrdf.xq?emp=7839

and the script retrieves the RDF:Description of the selected resource from the RDF file directly.

This mechanism does not conform to the recommended practice of distinguishing between information resources (such as the information about employee 7839) and the real world entity being represented. At present, the resource URI de-references directly to the RDF, rather than to indirect using the 303 mechanism recommended.


declare namespace rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
declare variable $rdf := doc("/db/Wiki/RDF/empdept.rdf");
declare option exist:serialize "media-type=application/rdf+xml";

(: better to just parse the uri itself :)

let $param := request:get-parameter-names()
let $type := $param[1]
return 
   if ($type="all") 
   then 
      $rdf
   else
      let $key := request:get-parameter($type,())
      let $resourceuri := concat("http://www.cems.uwe.ac.uk/empdept/",$type,"/",$key)
      return
      <rdf:RDF>
          {$rdf//rdf:Description[@rdf:about=$resourceuri]}
     </rdf:RDF>

To Do

This article is issued from Wikibooks. The text is licensed under Creative Commons - Attribution - Sharealike. Additional terms may apply for the media files.