REBOL [ Title: "Convert an XML-derived block structure into objects." File: %xml-object.r Date: 29-Sep-2001 Version: 1.0.4 Author: "Gavin F. McKenzie" Email: gavin.mckenzie@sympatico.ca Purpose: { This script creates a function "xml-to-object" that converts a series of nested blocks, created from an XML document by parse-xml, into a series of nested objects that represent the original content of the XML document processed. } History: [ 1.0.0 [17-Jul-2001 "First public release."] 1.0.1 [17-Jul-2001 "Support for mixed content."] 1.0.2 [06-Sep-2001 "Fixed a bug handling empty elements."] 1.0.3 [22-Sep-2001 {Fixed a bug handling a mixture of unique and multiply occuring elements sharing the same enclosing element.}] 1.0.4 [29-Sep-2001 {Fixed a bug improperly ignoring whitespace. Changed switch-es to use type?/word.}] ] Acknowledgments: { Many thanks to Mike Hansen for finding and reporting defects in this script. } ] xml-to-object: function [{ Convert a series of nested blocks, created from an XML document by parse-xml, into a series of nested objects that represent the original content of the XML document processed. Returns the root object. } document [block!] "The block representing the processed XML document." ][ name children child attr-list contains-char-content contains-element-content content-model potential-new-content new-content is-allspace do-character-content do-element-content add-mixed-content get-mixed-value ][ is-allspace: function [s] [] [ either (type? s) = string! [ s: copy s ][ s: form copy s ] s: trim/all s either (length? s) = 0 [ return true ][ return false ] ] do-character-content: func [ children [string!] ][ append new-content reduce [to-set-word 'value? children] remove next next document ] do-empty-content: func [ ][ append new-content reduce [to-set-word 'value? ""] remove next next document ] do-element-content: func [ children [block!] /local entry ][ ; ; Process all child content of this element ; forall children [ potential-new-content: xml-to-object children/1 ; ; Is there already an object member known by this name? ; (i.e. does it look like this is the beginning of ; multiply occurring elements?) ; entry: find new-content (to-set-word potential-new-content/1) ; ; Yes, there is already an object member known by this name ; either entry [ if entry/3 = 'object! [ ; ...so we need to transform the existing object member ; from a single object into a block of objects, ; and append the potential-new-content into the block ; change/part at entry 3 reduce ['block! 'reduce] 1 change/only at entry 5 reduce [ 'make 'object! entry/5 ] ] append entry/5 (copy/part at potential-new-content 2 3) ][ append new-content potential-new-content ] ] ] add-mixed-content: func [ children [string! block!] /local v ][ either (empty? new-content) or (none? find new-content to-set-word 'content?) [ append new-content reduce [ to-set-word 'content? 'make 'block! reduce [children] to-set-word 'value? 'does [get-mixed-value self] ] ][ v: find new-content to-set-word 'content? either (type? children) = string! [ append v/4 children ][ append v/4 to-word children/1 ] ] ] get-mixed-value: function [ obj [object!] ][ item cooked-value? ][ cooked-value?: copy "" foreach item (reduce obj/content?) [ either (type? item) = string! [ append cooked-value? item ][ append cooked-value? item/value? ] ] cooked-value? ] name: to-word document/1 change document to-set-word document/1 new-content: copy [] contains-char-content: false contains-element-content: false content-model: 'empty ; ; Extract attributes and children ; attr-list: document/2 children: document/3 remove/part next document 2 ; ; Determine the content model ; if not none? children [ for i 1 (length? children) 1 [ child: pick children i switch type?/word child [ string! [ content-model: 'character either is-allspace child [ contains-char-content: 'ignoreable-ws ][ contains-char-content: 'true ] ] block! [ contains-element-content: true content-model: 'element ] ] if (contains-char-content = 'true) and (contains-element-content = true) [ content-model: 'mixed break ] ] ] ; ; Remove any ignoreable whitespace nodes ; if (contains-char-content = 'ignoreable-ws) and (contains-element-content = true) [ content-model: 'element while [not tail? children] [ either (type? children/1) = string! [ remove children ][ children: next children ] ] children: head children ] ; ; Actually do the work of 'objectifying' the block ; switch content-model [ empty [ do-empty-content ] character [ do-character-content children/1 ] element [ do-element-content children ] mixed [ forall children [ switch type?/word children/1 [ string! [ add-mixed-content children/1 ] block! [ add-mixed-content children/1 do-element-content copy/part children 1 ] ] ] ] ] ; ; Process attributes ; if (not none? attr-list) [ forskip attr-list 2 [ change attr-list to-set-word attr-list/1 ] attr-list: head attr-list append new-content attr-list ] ; ; Insert the result of all our hard work into the block ; if not empty? new-content [ insert/only at document 2 new-content insert at document 2 reduce ['make 'object!] ] document ]