declare namespace a = "http://www.w3.org/2005/Atom"; declare namespace saxon = "http://saxon.sf.net/"; declare namespace loc = "https://sommer.dev.java.net/atom/2006-06-06/transform/"; declare namespace xhtml = "http://www.w3.org/1999/xhtml"; declare option saxon:output "method=text"; (: declare option saxon:output "omit-xml-declaration=yes"; sadly this is not propagated to saxon:serialize :) declare variable $format as xs:string external; (: serialise the element to an xml string and remove the leading ]*>","") }; (: take the div element surrounding xhtml content and add base and lang info to it No longer needed, as we now put all content into a :Content element, that knows about base and div declare function loc:div($node as element()) { element {node-name($node)} { ( $node/@*, let $lang := loc:lang($node) return if (empty($lang)) then () else attribute xml:lang { $lang }, let $base := loc:base($node) return if (empty($base)) then () else attribute xml:base { $base }, for $c in $node/(node()|*|text()|@*) except $node/(@xml:lang, @xml:base) return typeswitch($c) case $a as attribute() return $a (: was: attribute { node-name($a)} { loc:esc($a) } :) case $e as element() return loc:escape($e) case $t as text() return loc:esc($t) case $c as comment() return comment { loc:esc($c) } case $p as processing-instruction() return $p default return ("#unprocessed element ",name($c),"; ") ) } };:) (: escape all the " in text node children and grandchildren of the given node :) declare function loc:escape($node as element()) { element {node-name($node)} { ( for $c in $node/(node()|*|text()|@*) return typeswitch($c) case $a as attribute() return $a (: was: attribute { node-name($a)} { loc:esc($a) } :) case $e as element() return loc:escape($e) case $t as text() return loc:esc($t) case $c as comment() return comment { loc:esc($c) } case $p as processing-instruction() return $p default return () (: todo: how do I allert the user? "#unprocessed element ",name($c),"; ":) ) } }; declare function loc:esc($t as xs:string) as xs:string { replace(replace($t,'\\','\\\\'),'"','\\"') }; declare function loc:uriatt($node,$uri as attribute()) { ("<", let $base := loc:base($node) return if (empty($base)) then $uri else resolve-uri($uri,$base),">") }; declare function loc:uri($uri as element()) { ("<", let $base := loc:base($uri) return if (empty($base)) then $uri else resolve-uri($uri,$base),">") }; (:find the base uri for the node:) declare function loc:base($node as element()) { let $bases := reverse($node/ancestor-or-self::node()/@xml:base) return if (count($bases) = 0) then () else loc:baseResolver($bases) }; declare function loc:baseResolver($bases as attribute()*) as xs:anyURI { if (count($bases) = 1) then $bases[1] else resolve-uri($bases[1],loc:baseResolver(subsequence($bases,2))) }; declare function loc:generator($esc as xs:string, $g as element()) { ("[ a :Generator; ", for $at in $g/@* return ($esc," ", typeswitch($at) case $u as attribute(uri) return (":uri ",loc:uriatt($g,$u),"; ") case $v as attribute(version) return (":generatorVersion ",loc:textquote(loc:esc($v)),"; ") default return () ), $esc," :name ",loc:textatt($g),"]") }; declare function loc:personPos($p as element()) { let $parent := $p/.. for $child at $pos in $parent/a:author return if ($child is $p) then string($pos) else () }; declare function loc:person($tab as xs:string, $p as element()) { ( "[ a :Person; ", if (not(empty($p/../../a:feed))) (: this is a person element on the root node:) then ($tab, " owl:sameAs", " _:author",loc:personPos($p), "; ") else if (not(empty($p/../../a:source))) (: this is a person element in a source feed :) then ($tab," owl:sameAs"," _:authorOfEntry",loc:entryNum($p/../..),"_",loc:personPos($p),"; ") else (), for $el in $p/* return ($tab," ", typeswitch($el) case $u as element(a:uri) return (":uri ",loc:uri($u),"; ") case $n as element(a:name) return (":name ",loc:textatt($n),"; ") case $e as element(a:email) return (":email ","<",concat("mailto:",$e),"> ; ") default return ("#unprocessed element ",name($el),"; ") ), $tab,"]" ) }; declare function loc:linktype($node, $rel as attribute()) { if (not((contains($rel,'/') or contains($rel,':')))) then ("iana:",$rel) else loc:uriatt($node,$rel) }; declare function loc:link($tab as xs:string,$link as element()) { ("[ a :Link; ", $tab, ":rel ", let $rel := $link/@rel return if (empty($rel)) then "iana:alternate ; " else (loc:linktype($link,$rel),"; "), let $title := $link/@title return if (not(empty($title))) then ($tab,":title ",loc:textquote($title),"; ") else (), $tab,":to [ :src ", loc:uriatt($link,$link/@href), ";", let $type := $link/@type return if (empty($type)) then () else (" ",$tab,' :type "',$type,'";') , let $length := $link/@length return if (empty($length)) then () else (" ",$tab," :length ",$length,";"), "] ", $tab,"]" ) }; declare function loc:date($date as element()) { ('"',$date,'"^^xsd:dateTime') }; declare function loc:category($tab as xs:string, $cat as element()) { ("[ a :Category; ", for $att in $cat/@* return ($tab,typeswitch($att) case $t as attribute(term) return (':term ', loc:textquote($t), "; ") case $s as attribute(scheme) return (':scheme ', loc:uriatt($cat,$s), "; ") case $l as attribute(label) return (':label ', loc:textatt($l), "; ") default return ("#unprocessed attribute ",name($att),"; ") ) ), if (empty($cat/text())) then () else "#unprocessed text content ", $tab, "]" }; declare function loc:entryNum($entry as element(a:entry)) as xs:string { for $e at $p in root($entry)/a:feed/a:entry return if ($e is $entry) then string($p) else () }; declare function loc:entry($tab as xs:string,$entry as element()) { ( " a :Entry; ", if ($entry/../.. is root($entry)) then (: this is an entry in a feed :) if (empty($entry/a:author)) (: this entry has no authors, so it inherits :) then if (empty($entry/a:source)) (: this entry has no source feed info :) then ($tab,":author ", for $a at $pos in $entry/../a:author return ( if ($pos != 1) then "," else "","_:author", string($pos)), "; ") else ($tab,":author ", let $en := loc:entryNum($entry) for $a at $pos in $entry/a:source/a:author return ( if ($pos != 1) then "," else "","_:authorOfEntry",loc:entryNum($entry),"_", string($pos)), "; ") else () else (), for $n in $entry/* return ($tab, typeswitch($n) case $t as element(a:title) return (":title ", loc:content(concat($tab," "),$t), "; ") case $r as element(a:rights) return (":rights ",loc:content(concat($tab," "),$r),"; ") case $u as element(a:updated) return (":updated ",loc:date($u),"; ") case $p as element(a:published) return (":published ",loc:date($p),"; ") case $auth as element (a:author) return (":author ",loc:person(concat($tab," "),$auth),"; ") case $contr as element (a:contributor) return (":contributor ",loc:person(concat($tab," "),$contr),"; ") case $id as element(a:id) return(":id ",loc:textquote($id),"^^xsd:anyURI; ") case $link as element(a:link) return (":link ",loc:link(concat($tab," "),$link),"; ") case $smry as element(a:summary) return (":summary ",loc:content(concat($tab," "),$smry),"; ") case $ctnt as element(a:content) return (":content ",loc:content(concat($tab," "),$ctnt),"; ") case $cat as element(a:category) return (":category ",loc:category(concat($tab," "),$cat),"; ") case $src as element(a:source) return (":source [ ",loc:feed(concat($tab," "),$src)," ]; ") default return ("#unprocessed element ",name($n),"; ") ) ) }; declare function loc:feed($tab,$feed as element()) { (" a :Feed; ", for $n in $feed/* return ($tab, typeswitch($n) case $e as element(a:entry) return (if (empty($e/a:source)) then ":entry [ " else ":aggregates [ ", loc:entry(concat($tab," "),$e), " ]; ") case $t as element(a:title) return (":title ", loc:content(concat($tab," "),$t), "; ") case $st as element(a:subtitle) return (":subtitle " , loc:content(concat($tab," "),$st), "; ") case $g as element(a:generator) return (":generator ", loc:generator(concat($tab," "),$g),"; ") case $i as element(a:icon) return (":icon ",loc:uri($i),"; ") case $l as element(a:logo) return (":logo ",loc:uri($l),"; ") case $r as element(a:rights) return (":rights ",loc:content(concat($tab," "),$r),"; ") case $u as element(a:updated) return (":updated ",loc:date($u),"; ") case $p as element(a:published) return (":published ",loc:date($p),"; ") case $auth as element (a:author) return (":author ",loc:person(concat($tab," "),$auth),"; ") case $contr as element (a:contributor) return (":contributor ",loc:person($tab,$contr),"; ") case $id as element(a:id) return(":id ",loc:textquote($id),"^^xsd:anyURI; ") case $link as element(a:link) return (":link ",loc:link(concat($tab," "),$link),"; ") case $cat as element(a:category) return (":category ",loc:category(concat($tab," "),$cat),"; ") default return ("#unprocessed element ",name($n),"; ") ) ) }; declare function loc:textatt($at as node()) { (loc:textquote($at),loc:atlang($at)) }; (: all text elements - ie: those that take text, html or xhtml content Have moved to using :Content everywhere as it makes life simpler. declare function loc:text($el as element()) { let $type := $el/@type return if ($type = "text" or $type="text/plain" or empty($type) ) then ("[ :text ",loc:textquote(loc:esc($el/text())),loc:atlang($el)," ]") else if ($type = "html" or $type="text/html") then ("[ :html ",loc:textquote(loc:esc($el/text()))," ]") else if ($type = "xhtml" or $type="text/xhtml" or $type="application/xhtml+xml") then ("[ :xhtml ",loc:textquote(loc:serialize(loc:div($el/xhtml:div)))," ]") else ("#unprocessed content type ",$type,"; ") }; :) declare function loc:escapeContent($el as element()) { let $type := loc:type($el/@type) return if ( starts-with($type,"text/") and not( ends-with($type,"/xhtml") ) ) then loc:textquote(loc:esc($el/text())) else if ((starts-with($type,"application/") and ends-with($type,"+xml")) or ($type eq "text/xhtml")) then loc:textquote(loc:serialize(loc:escape($el/xhtml:div))) else ("[]; #unprocessed content type ",$type,"; ") }; declare function loc:type($type) as xs:string { if ($type = "text" ) then "text/plain" else if ($type = "html") then "text/html" else if ($type = "xhtml") then "application/xhtml+xml" else if (not($type)) then "text/plain" else $type }; declare function loc:content($tab as xs:string,$content as element()) { ("[ a :Content;", for $att in ( $content/@* except $content/(@xml:lang, @xml:base, @type)) return (typeswitch($att) case $src as attribute(src) return (" :src ",loc:uriatt($content,$src),";") default return (" ",$tab,"#unprocessed attribute ",$att,"; ") ), (' :type "',loc:type($content/@type),'";'), let $lng := loc:lang($content) return if (empty(loc:lang($content))) then () else (' :lang "', $lng,'";'), let $bse := loc:base($content) let $tp := loc:type($content/@type) return if (empty($bse) or ($tp = 'text/plain')) then () else (' ',$tab,' :base <', $bse,'>;'), " ", if (empty(($content/* , $content/text()))) then () else ($tab,":body ",loc:escapeContent($content),"; "), $tab,"]" ) }; (: find the xml:lang value of a node :) declare function loc:lang($n as node()) { reverse($n/ancestor-or-self::node()/@xml:lang)[1] }; (: return the xml lang value of a node preceeded by @ if it is known or () :) declare function loc:atlang($n as node()) { let $lang := loc:lang($n) return if (empty($lang)) then () else ("@",$lang) }; (: Use single quotes when only one line of text, else use triple quotes :) declare function loc:textquote($t as xs:string) { if (not(matches($t,'[\n\r"]',"m"))) then ('"',loc:esc($t),'"') else ('"""',loc:esc($t),'"""') }; @prefix : <http://bblfish.net/work/atom-owl/2006-06-06/#> . @prefix iana: <http://www.iana.org/assignments/relation/>. @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix owl: <http://www.w3.org/2002/07/owl#> . { string-join( typeswitch(/*) case $f as element(a:feed) return ("[]", loc:feed(" ",$f)) case $e as element(a:entry) return ("[]",loc:entry(" ",$e)) default return ("#unprocessed element ",name(.),"; ") ,"") (: return loc:serialize(loc:escape($link)) :) } .