How to calculate the XPath position of an element using Javascript?

39,572

Solution 1

Firebug can do this, and it's open source (BSD) so you can reuse their implementation, which does not require any libraries.

3rd party edit

This is an extract from the linked source above. Just in case the link above will change. Please check the source to benefit from changes and updates or the full featureset provided.

Xpath.getElementXPath = function(element)
{
    if (element && element.id)
        return '//*[@id="' + element.id + '"]';
    else
        return Xpath.getElementTreeXPath(element);
};

Above code calls this function. Attention i added some line-wrapping to avoid horizontal scroll bar

Xpath.getElementTreeXPath = function(element)
{
    var paths = [];  // Use nodeName (instead of localName) 
    // so namespace prefix is included (if any).
    for (; element && element.nodeType == Node.ELEMENT_NODE; 
           element = element.parentNode)
    {
        var index = 0;
        var hasFollowingSiblings = false;
        for (var sibling = element.previousSibling; sibling; 
              sibling = sibling.previousSibling)
        {
            // Ignore document type declaration.
            if (sibling.nodeType == Node.DOCUMENT_TYPE_NODE)
                continue;

            if (sibling.nodeName == element.nodeName)
                ++index;
        }

        for (var sibling = element.nextSibling; 
            sibling && !hasFollowingSiblings;
            sibling = sibling.nextSibling)
        {
            if (sibling.nodeName == element.nodeName)
                hasFollowingSiblings = true;
        }

        var tagName = (element.prefix ? element.prefix + ":" : "") 
                          + element.localName;
        var pathIndex = (index || hasFollowingSiblings ? "[" 
                   + (index + 1) + "]" : "");
        paths.splice(0, 0, tagName + pathIndex);
    }

    return paths.length ? "/" + paths.join("/") : null;
};

Solution 2

A function I use to get an XPath similar to your situation, it uses jQuery:

function getXPath( element )
{
    var xpath = '';
    for ( ; element && element.nodeType == 1; element = element.parentNode )
    {
        var id = $(element.parentNode).children(element.tagName).index(element) + 1;
        id > 1 ? (id = '[' + id + ']') : (id = '');
        xpath = '/' + element.tagName.toLowerCase() + id + xpath;
    }
    return xpath;
}

Solution 3

Small, powerfull and pure-js function

It returns xpath for the element and elements iterator for xpath.

https://gist.github.com/iimos/e9e96f036a3c174d0bf4

function xpath(el) {
  if (typeof el == "string") return document.evaluate(el, document, null, 0, null)
  if (!el || el.nodeType != 1) return ''
  if (el.id) return "//*[@id='" + el.id + "']"
  var sames = [].filter.call(el.parentNode.children, function (x) { return x.tagName == el.tagName })
  return xpath(el.parentNode) + '/' + el.tagName.toLowerCase() + (sames.length > 1 ? '['+([].indexOf.call(sames, el)+1)+']' : '')
}

Probably you will need to add a shim for IE8 that don't support the [].filter method: this MDN page gives such code.

Usage

Getting xpath for node:
var xp = xpath(elementNode)
Executing xpath:
var iterator = xpath("//h2")
var el = iterator.iterateNext();
while (el) {
  // work with element
  el = iterator.iterateNext();
}

Solution 4

The firebug implementation can be modified slightly to check for element.id further up the dom tree:

  /**
   * Gets an XPath for an element which describes its hierarchical location.
   */
  var getElementXPath = function(element) {
      if (element && element.id)
          return '//*[@id="' + element.id + '"]';
      else
          return getElementTreeXPath(element);
  };

  var getElementTreeXPath = function(element) {
      var paths = [];

      // Use nodeName (instead of localName) so namespace prefix is included (if any).
      for (; element && element.nodeType == 1; element = element.parentNode)  {
          var index = 0;
          // EXTRA TEST FOR ELEMENT.ID
          if (element && element.id) {
              paths.splice(0, 0, '/*[@id="' + element.id + '"]');
              break;
          }

          for (var sibling = element.previousSibling; sibling; sibling = sibling.previousSibling) {
              // Ignore document type declaration.
              if (sibling.nodeType == Node.DOCUMENT_TYPE_NODE)
                continue;

              if (sibling.nodeName == element.nodeName)
                  ++index;
          }

          var tagName = element.nodeName.toLowerCase();
          var pathIndex = (index ? "[" + (index+1) + "]" : "");
          paths.splice(0, 0, tagName + pathIndex);
      }

      return paths.length ? "/" + paths.join("/") : null;
  };

Solution 5

I have just modified DanS' solution in order to use it with textNodes. Very useful to serialize HTML range object.

/**
 * Gets an XPath for an node which describes its hierarchical location.
 */
var getNodeXPath = function(node) {
    if (node && node.id)
        return '//*[@id="' + node.id + '"]';
    else
        return getNodeTreeXPath(node);
};

var getNodeTreeXPath = function(node) {
    var paths = [];

    // Use nodeName (instead of localName) so namespace prefix is included (if any).
    for (; node && (node.nodeType == 1 || node.nodeType == 3) ; node = node.parentNode)  {
        var index = 0;
        // EXTRA TEST FOR ELEMENT.ID
        if (node && node.id) {
            paths.splice(0, 0, '/*[@id="' + node.id + '"]');
            break;
        }

        for (var sibling = node.previousSibling; sibling; sibling = sibling.previousSibling) {
            // Ignore document type declaration.
            if (sibling.nodeType == Node.DOCUMENT_TYPE_NODE)
                continue;

            if (sibling.nodeName == node.nodeName)
                ++index;
        }

        var tagName = (node.nodeType == 1 ? node.nodeName.toLowerCase() : "text()");
        var pathIndex = (index ? "[" + (index+1) + "]" : "");
        paths.splice(0, 0, tagName + pathIndex);
    }

    return paths.length ? "/" + paths.join("/") : null;
};
Share:
39,572

Related videos on Youtube

Marc
Author by

Marc

I make things.

Updated on April 05, 2022

Comments

  • Marc
    Marc about 2 years

    Let's say I have a large HTML file with different kinds of tags, similar to the StackOverflow one you're looking at right now.

    Now let's say you click an element on the page, what would the Javascript function look like that calculates the most basic XPath that refers to that specific element?

    I know there are an infinite ways of refering to that element in XPath, but I'm looking for something that just looks at the DOM tree, with no regard for IDs, classes, etc.

    Example:

    <html>
    <head><title>Fruit</title></head>
    <body>
    <ol>
      <li>Bananas</li>
      <li>Apples</li>
      <li>Strawberries</li>
    </ol>
    </body>
    </html>
    

    Let's say you click on Apples. The Javascript function would return the following:

    /html/body/ol/li[2]
    

    It would basically just work its way upward the DOM tree all the way to the HTML element.

    Just to clarify, the 'on-click' event-handler isn't the problem. I can make that work. I'm just not sure how to calculate the element's position within the DOM tree and represent it as an XPath.

    PS Any answer with or without the use of the JQuery library is appreciated.

    PPS I completely new to XPath, so I might even have made a mistake in the above example, but you'll get the idea.

    Edit at August 11, 2010: Looks like somebody else asked a similar question: generate/get the Xpath for a selected textnode

    • Matthew Flaschen
      Matthew Flaschen almost 14 years
      XPath uses 1-based indexing, so it's li[2].
    • Marc
      Marc almost 14 years
      Thanks, I've changed the code.
  • Marc
    Marc almost 14 years
    Thanks for the suggestion, but this code doesn't seem to take into account similar sibling nodes. E.g. the code returns 'BODY/OL/LI' instead of 'BODY/OL/LI[2]'.
  • Marc
    Marc almost 14 years
    Works perfectly as well! Thanks
  • Marc
    Marc almost 14 years
    I'm actually using jQuery's XPath selector as well, but needed a way to have the user generate an XPath himself. The second page you linked to has some nice examples of this. Thanks!
  • Frunsi
    Frunsi almost 14 years
    line 7: never seen that syntax, usually it would be written like this: id = id > 1 ? ('[' + id + ']') : '';
  • JCD
    JCD almost 14 years
    You're correct, I would have written it as you did, but I was just copying/pasting...I found this script some time ago and never bothered to clean it up. Either way, both ways of writing it are equivalent.
  • Marc
    Marc almost 14 years
    I'm not sure what it does, but it might come in handy at a later point. Thanks!
  • Marc
    Marc almost 14 years
    Didn't mean to be picky, but the sibling index information is crucial for the problem I'm trying to solve. Anyway, thanks for updating your code!
  • OneWorld
    OneWorld about 12 years
    I like the Firebug implementation (getElementXPath), because it favors the ID over a xpath tree if the element has any.
  • DanS
    DanS almost 12 years
    @OneWorld Yes, but it only checks the element you are getting the xpath of and not it's parents. See my answer, it produces an xpath more like the google chrome inspector.
  • Sali Hoo
    Sali Hoo over 10 years
    Do the following code returns correct xpath for the div in following html?
  • wadim
    wadim almost 10 years
    Firebug's implementation has a bug in line 1365: it only adds the index, (e.g. "[3]") if there are previous siblings of same type. This is wrong because an XPath without the index will match all siblings of this type, even further ones. Example: /p/b will match all b tags under all p tags under root. Firebug just skips the index if it doesn't find previous siblings of same type.
  • wadim
    wadim almost 10 years
    This code has several problems: (1) when counting sibling index it actually checks if any of the siblings is the target of the event (instead of checking if any of the siblings is the same node type as the event target), (2) it takes into account sibling index information on the deepest level only, not further up the tree.
  • iimos
    iimos over 8 years
    BUG: On first tag of <div/><div/> it returns '/div' instead of '/div[1]'
  • Yoiku
    Yoiku over 8 years
    Which was the file that contain this method? I need somthing similar but firebug was move to github and I can't find the method
  • qqilihq
    qqilihq almost 8 years
    On current versions, the mentioned implementation can be found here: github.com/firebug/firebug/blob/master/extension/content/…
  • DaveRead
    DaveRead over 4 years
    Example javascript/jQuery link is no longer available
  • Nathan
    Nathan over 2 years
    This doesn't work for me at all. I'm passing in a startContainer node from a Range object and am getting null.