Generic XML and XML Schema for Hierarchical Content

Most hierarchical content can be described in XML in very much the same way. Following is a generic XML schema (and sample XML) for hierarchical content.

For the purposes of this demonstration, we assume that this XML describes a hierarchical content taxonomy.

Sample XML:

<?xml version="1.0" encoding="UTF-8"?>

<taxonomy xsi:noNamespaceSchemaLocation="sample-taxonomy.xsd" 
			xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
	
	<terms>		
		<term code="12345">
			LAMP
		</term>		
		<term code="22347">
			LGPL
		</term>		
		<!-- The list of terms continues... -->		
	</terms>
		
	<hierarchy>
		<tuple>
			<parent_term code="12345" />
			<child_term code="12444" />
			<valid_from>2006-10-23</valid_from>
			<valid_to>2007-01-23</valid_to>
		</tuple>
		
		<!-- valid_to is optional. It must be present
		but if it is empty indefinite validity is assumed -->
		<tuple>
			<parent_term code="12345" />
			<child_term code="22347" />
			<valid_from>2006-10-23</valid_from>
			<valid_to />
		</tuple>

		<!-- The list of relationships continues...-->
		
	</hierarchy>
		
</taxonomy>

And corresponding XML Schema (XSD) file could be something like:

<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified">

  <!-- CUSTOM TYPE DEFINITIONS -->

  <!-- Some additional security to make sure the
  length of the code element does not exceed the
  max length in the database. For this example 2-255
  are the boundary limits. You can change it. -->
  <xsd:simpleType name="varchar255">
  	<xsd:restriction base="xsd:string">
    	<xsd:maxLength value="255"/>
    	<xsd:minLength value="2" />    	
	</xsd:restriction>
  </xsd:simpleType>

  <xsd:simpleType name="nullableDate">
	 <xsd:restriction base="xsd:string">
	    <xsd:pattern value="([0-9]{4}\-[0-9]{2}-[0-9]{2}){0,1}"/>
	  </xsd:restriction>
  </xsd:simpleType>

  <!-- END CUSTOM TYPE DEFINITIONS -->

  <xsd:element name="taxonomy">
    <xsd:complexType>
      <xsd:all minOccurs="1" maxOccurs="1">
        <xsd:element ref="terms"/>
        <xsd:element ref="hierarchy"/>
      </xsd:all>
    </xsd:complexType>
  </xsd:element>

  <xsd:element name="terms">
    <xsd:complexType mixed="true">
      <xsd:choice minOccurs="0" maxOccurs="unbounded">
        <xsd:element ref="term"/>
      </xsd:choice>
    </xsd:complexType>
  </xsd:element>

  <xsd:element name="term">
    <xsd:complexType>
     <xsd:simpleContent>
       <xsd:extension base="xsd:string">
      <xsd:attribute name="code" type="varchar255" use="required"/>
       </xsd:extension>
     </xsd:simpleContent>
    </xsd:complexType>
  </xsd:element>

  <xsd:element name="hierarchy">
    <xsd:complexType>
      <xsd:choice minOccurs="0" maxOccurs="unbounded">
        <xsd:element ref="tuple"/>
      </xsd:choice>
    </xsd:complexType>
  </xsd:element>

  <xsd:element name="tuple">
    <xsd:complexType mixed="true">
      <xsd:all minOccurs="1" maxOccurs="1">
        <xsd:element ref="parent_term" />
        <xsd:element ref="child_term" />
        <xsd:element ref="valid_from" />
        <xsd:element ref="valid_to" />
      </xsd:all>
    </xsd:complexType>
  </xsd:element>

  <xsd:element name="parent_term">
    <xsd:complexType>
      <xsd:attribute name="code" type="varchar255" use="required"/>
    </xsd:complexType>
  </xsd:element>

  <xsd:element name="child_term">
    <xsd:complexType>
      <xsd:attribute name="code" type="varchar255" use="required"/>
    </xsd:complexType>
  </xsd:element>

  <xsd:element name="valid_from" type="xsd:date" />
  <xsd:element name="valid_to" type="nullableDate" />
</xsd:schema>