XML parsing, TXMLDocument

42,445

Solution 1

Try this:

uses ComObj, MSXML;

procedure TForm1.Button1Click(Sender: TObject);
var
  xml: IXMLDOMDocument;
  node: IXMLDomNode;
  nodes_row, nodes_se: IXMLDomNodeList;
  i, j: Integer;
  url: string;
begin
  // put url or file name
  url := 'http://softez.pp.ua/gg.xml';

  xml := CreateOleObject('Microsoft.XMLDOM') as IXMLDOMDocument;
  xml.async := False;
  xml.load(url); // or use loadXML to load XML document using a supplied string
  if xml.parseError.errorCode <> 0 then
    raise Exception.Create('XML Load error:' + xml.parseError.reason);

  Memo1.Clear;
  nodes_row := xml.selectNodes('/doc/data/row');
  for i := 0 to nodes_row.length - 1 do
  begin
    node := nodes_row.item[i];
    Memo1.Lines.Add('phrase=' + node.selectSingleNode('phrase').text);
    nodes_se := node.selectNodes('search_engines/search_engine/se_url');
    for j := 0 to nodes_se.length - 1 do
    begin
      node := nodes_se.item[j];
      Memo1.Lines.Add('url=' + node.text);
    end;
    Memo1.Lines.Add('--------------');
  end;
end;

Result:

phrase=key1
url=link1_1
url=link1_2
--------------
phrase=key2
url=link2_1
url=link2_2
--------------

A Reference to IXMLDOMDocument

Solution 2

If you first include these 3 general purpose library routines ....

uses XMLDoc, XMLIntf, xmldom;

function CreateXMLDocument( var Owner1: TComponent): TXMLDocument;
begin
Owner1 := TComponent.Create( nil);
result  := TXMLDocument.Create( Owner1);
result.Options := [doNodeAutoCreate, doNodeAutoIndent, doAttrNull,
                   doAutoPrefix, doNamespaceDecl];
result.DOMVendor := GetDOMVendor( 'MSXML');
end;

function XPATHSelect( const FocusNode: IXMLNode; const sXPath: string): TArray<IXMLNode>;
var
  DomNodeSelect: IDomNodeSelect;
  DOMNode      : IDomNode;
  DocAccess    : IXmlDocumentAccess;
  Doc          : TXmlDocument;
  DOMNodes     : IDOMNodeList;
  iDOMNode     : integer;
begin
SetLength( result, 0);
if assigned( FocusNode) and
   Supports( FocusNode.DOMNode, IDomNodeSelect, DomNodeSelect) then
    DOMNodes := DomNodeSelect.SelectNodes( sXPath);
if not assigned( DOMNodes) then exit;
SetLength( result, DOMNodes.Length);
for iDOMNode := 0 to DOMNodes.Length - 1 do
  begin
  Doc := nil;
  DOMNode := DOMNodes.item[iDOMNode];
  if Supports( DOMNode, IXmlDocumentAccess, DocAccess) then
    Doc := DocAccess.DocumentObject;
  result[ iDOMNode] := TXmlNode.Create( DOMNode, nil, Doc) as IXMLNode;
  end
end;


function XPATHSelectFirst( const FocusNode: IXMLNode; const sXPath: string; var SelectedNode: IXMLNode): boolean;
var
  DomNodeSelect: IDomNodeSelect;
  DOMNode      : IDomNode;
  DocAccess    : IXmlDocumentAccess;
  Doc          : TXmlDocument;
begin
SelectedNode := nil;
if assigned( FocusNode) and
   Supports( FocusNode.DOMNode, IDomNodeSelect, DomNodeSelect) then
  DOMNode := DomNodeSelect.selectNode( sXPath);
if assigned( DOMNode) and
   Supports( DOMNode.OwnerDocument, IXmlDocumentAccess, DocAccess) then
  Doc := DocAccess.DocumentObject;
if Assigned( DOMNode) then
  SelectedNode := TXmlNode.Create( DOMNode, nil, Doc);
result := assigned( SelectedNode)
end;

Then A much neater solution is ...

procedure TForm2.btn1Click(Sender: TObject);
const
  DocumentSource =  'http://softez.pp.ua/gg.xml';
var
  Doc: IXMLDocument;
  DocOwner: TComponent;
  RowNode, PhraseNode, UrlNode: IXMLNode;

  procedure PutLn( const LineFmt: string; const Args: array of const);
  begin
  memo2.Lines.Add( Format( LineFmt, Args))
  end;

begin
memo2.Clear;
Doc := CreateXMLDocument( DocOwner);
Doc.LoadFromFile( DocumentSource);
for RowNode in XPATHSelect( Doc.DocumentElement, '//row[phrase]') do
  begin
  if not XPATHSelectFirst( RowNode, 'phrase', PhraseNode) then continue;
  PutLn( 'phrase=%s', [PhraseNode.NodeValue]);
  for UrlNode in XPATHSelect( RowNode, 'search_engines/search_engine/se_url') do
    PutLn( 'url=%s', [UrlNode.NodeValue]);
  PutLn('--------------',[])
  end;
DocOwner.Free;
end;

This was tested on Delphi 2010 and works a treat.

Solution 3

And just for good measure, here is another answer, provided you are not afraid to mix in a little XSLT !

const Transform =
'<?xml version="1.0" encoding="utf-8"?>' +
'<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> ' +
' <xsl:output method="text" indent="no"/>' +
' <xsl:template match="text()|@*"/>' +
' <xsl:template match="//row">' +
'  <xsl:text>phrase=</xsl:text>' +
'  <xsl:value-of select="phrase"/>' +
'  <xsl:text>&#10;</xsl:text>' +
'  <xsl:apply-templates/>' +
'  <xsl:text>--------------&#10;</xsl:text>' +
' </xsl:template>' +
' <xsl:template match="search_engines/search_engine/se_url">' +
'  <xsl:text>url=</xsl:text>' +
'  <xsl:value-of select="."/>' +
'  <xsl:text>&#10;</xsl:text>' +
' </xsl:template>' +
'</xsl:stylesheet>';


procedure TForm2.btn1Click( Sender: TObject);
const
  DocumentSource =  'http://softez.pp.ua/gg.xml';
var
  Doc, Style: IXMLDocument;
  DocOwner, StyleOwner: TComponent;
  sOut: widestring;
begin
  memo2.Clear;
  Doc := CreateXMLDocument( DocOwner);
  Doc.LoadFromFile( DocumentSource);
  Style := CreateXMLDocument( StyleOwner);
  Style.LoadFromXML( Transform);
  Doc.DocumentElement.TransformNode( Style.DocumentElement, sOut);
  memo2.Lines.Add( sOut);
  DocOwner.Free;  
  StyleOwner.Free
end;
Share:
42,445
dedoki
Author by

dedoki

Updated on May 14, 2020

Comments

  • dedoki
    dedoki almost 4 years

    I have a problem with parsing XML.

    How to get field values se_url and phrase?
    I need to get link1_1, link1_2, key1, link2_1, link2_2, key2... which are in se_url and phrase.

    I did not find in Google how to do it (also did not find a manual on how to work with TXMLDocument).

    <doc>
      <date2>20120214</date2>
      <date1>20120214</date1>
      <data count="116">
        <row>
          <search_engines count="2">
            <search_engine>
              <se_url>link1_1</se_url>
              <se_page>1</se_page>
              <se_id>2</se_id>
            </search_engine>
            <search_engine>
              <se_url>link1_2</se_url>
              <se_page>1</se_page>
              <se_id>3</se_id>
            </search_engine>
          </search_engines>
          <denial>0.4889</denial>
          <visits>45</visits>
          <page_views>52</page_views>
          <phrase>key1</phrase>
          <visit_time>126</visit_time>
          <depth>1.1556</depth>
        </row>
        <row>
          <search_engines count="2">
            <search_engine>
              <se_url>link2_1</se_url>
              <se_page>1</se_page>
              <se_id>3</se_id>
            </search_engine>
            <search_engine>
              <se_url>link2_2</se_url>
              <se_page>1</se_page>
              <se_id>6</se_id>
            </search_engine>
          </search_engines>
          <denial>0.5714</denial>
          <visits>42</visits>
          <page_views>50</page_views>
          <phrase>key2</phrase>
          <visit_time>109</visit_time>
          <depth>1.1905</depth>
        </row>
      </data>
    </doc>
    
  • kobik
    kobik about 12 years
    @KenWhite, Thank you for caring about the quality of the questions on SO :) ( BTW, Do you know why my code is not colorized? :/ )
  • Ken White
    Ken White about 12 years
    I don't know why it's not being syntax-highlighted. I went and looked at the source, and it looks fine. I even added an explicit language comment with no change. ???
  • dedoki
    dedoki about 12 years
    One more question: how to get the attribute value?
  • kobik
    kobik about 12 years
    node.attributes.getNamedItem('count').text.
  • Shaun Roselt
    Shaun Roselt about 7 years
    I seem to be getting an error. It says "Undeclared identifier: 'CreateXMLDocument'". I am using Delphi 10.1 Berlin Starter Edition and FireMonkey. Then I also have the following in my uses list that is related to XML: Xml.xmldom, Xml.XMLIntf, Xml.adomxmldom, Xml.XMLDoc