Python Selenium wait for several elements to load

13,349

Solution 1

Keeping in mind comments of Mr.E. and Arran I made my list traversal fully on CSS selectors. The tricky part was about my own list structure and marks (changing classes, etc.), as well as about creating required selectors on the fly and keeping them in memory during traversal.

I disposed waiting for several elements by searching for anything that is not loading state. You may use ":nth-child" selector as well like here:

#in for loop with enumerate for i    
selector.append(' > li:nth-child(%i)' % (i + 1))  # identify child <li> by its order pos

This is my hard-commented code solution for example:

def parse_crippled_shifted_list(driver, frame, selector, level=1, parent_id=0, path=None):
    """
    Traversal of html list of special structure (you can't know if element has sub list unless you enter it).
    Supports start from remembered list element.

    Nested lists have classes "closed" and "last closed" when closed and "open" and "last open" when opened (on <li>).
    Elements themselves have classes "leaf" and "last leaf" in both cases.
    Nested lists situate in <li> element as <ul> list. Each <ul> appears after clicking <a> in each <li>.
    If you click <a> of leaf, page in another frame will load.

    driver - WebDriver; frame - frame of the list; selector - selector to current list (<ul>);
    level - level of depth, just for console output formatting, parent_id - id of parent category (in DB),
    path - remained path in categories (ORM objects) to target category to start with.
    """

    # Add current level list elements
    # This method selects all but loading. Just what is needed to exclude.
    selector.append(' > li > a:not([class=loading])')

    # Wait for child list to load
    try:
        query = WebDriverWait(driver, WAIT_LONG_TIME).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, ''.join(selector))))

    except TimeoutException:
        print "%s timed out" % ''.join(selector)

    else:
        # List is loaded
        del selector[-1]  # selector correction: delete last part aimed to get loaded content
        selector.append(' > li')

        children = driver.find_elements_by_css_selector(''.join(selector))  # fetch list elements

        # Walk the whole list
        for i, child in enumerate(children):

            del selector[-1]  # delete non-unique li tag selector
            if selector[-1] != ' > ul' and selector[-1] != 'ul.ltr':
                del selector[-1]

            selector.append(' > li:nth-child(%i)' % (i + 1))  # identify child <li> by its order pos
            selector.append(' > a')  # add 'li > a' reference to click

            child_link = driver.find_element_by_css_selector(''.join(selector))

            # If we parse freely further (no need to start from remembered position)
            if not path:
                # Open child
                try:
                    double_click(driver, child_link)
                except InvalidElementStateException:
                        print "\n\nERROR\n", InvalidElementStateException.message(), '\n\n'
                else:
                    # Determine its type
                    del selector[-1]  # delete changed and already useless link reference
                    # If <li> is category, it would have <ul> as child now and class="open"
                    # Check by class is priority, because <li> exists for sure.
                    current_li = driver.find_element_by_css_selector(''.join(selector))

                    # Category case - BRANCH
                    if current_li.get_attribute('class') == 'open' or current_li.get_attribute('class') == 'last open':
                        new_parent_id = process_category_case(child_link, parent_id, level)  # add category to DB
                        selector.append(' > ul')  # forward to nested list
                        # Wait for nested list to load
                        try:
                            query = WebDriverWait(driver, WAIT_LONG_TIME).until(
                                EC.presence_of_all_elements_located((By.CSS_SELECTOR, ''.join(selector))))

                        except TimeoutException:
                            print "\t" * level,  "%s timed out (%i secs). Failed to load nested list." %\
                                                 ''.join(selector), WAIT_LONG_TIME
                        # Parse nested list
                        else:
                            parse_crippled_shifted_list(driver, frame, selector, level + 1, new_parent_id)

                    # Page case - LEAF
                    elif current_li.get_attribute('class') == 'leaf' or current_li.get_attribute('class') == 'last leaf':
                        process_page_case(driver, child_link, level)
                    else:
                        raise Exception('Damn! Alien class: %s' % current_li.get_attribute('class'))

            # If it's required to continue from specified category
            else:
                # Check if it's required category
                if child_link.text == path[0].name:
                    # Open required category
                    try:
                        double_click(driver, child_link)

                    except InvalidElementStateException:
                            print "\n\nERROR\n", InvalidElementStateException.msg, '\n\n'

                    else:
                        # This element of list must be always category (have nested list)
                        del selector[-1]  # delete changed and already useless link reference
                        # If <li> is category, it would have <ul> as child now and class="open"
                        # Check by class is priority, because <li> exists for sure.
                        current_li = driver.find_element_by_css_selector(''.join(selector))

                        # Category case - BRANCH
                        if current_li.get_attribute('class') == 'open' or current_li.get_attribute('class') == 'last open':
                            selector.append(' > ul')  # forward to nested list
                            # Wait for nested list to load
                            try:
                                query = WebDriverWait(driver, WAIT_LONG_TIME).until(
                                    EC.presence_of_all_elements_located((By.CSS_SELECTOR, ''.join(selector))))

                            except TimeoutException:
                                print "\t" * level, "%s timed out (%i secs). Failed to load nested list." %\
                                                     ''.join(selector), WAIT_LONG_TIME
                            # Process this nested list
                            else:
                                last = path.pop(0)
                                if len(path) > 0:  # If more to parse
                                    print "\t" * level, "Going deeper to: %s" % ''.join(selector)
                                    parse_crippled_shifted_list(driver, frame, selector, level + 1,
                                                                parent_id=last.id, path=path)
                                else:  # Current is required
                                    print "\t" * level,  "Returning target category: ", ''.join(selector)
                                    path = None
                                    parse_crippled_shifted_list(driver, frame, selector, level + 1, last.id, path=None)

                        # Page case - LEAF
                        elif current_li.get_attribute('class') == 'leaf':
                            pass
                else:
                    print "dummy"

        del selector[-2:]

Solution 2

First and foremost the elements are AJAX elements.

Now, as per the requirement to locate all the desired elements and create a list, the simplest approach would be to induce WebDriverWait for the visibility_of_all_elements_located() and you can use either of the following Locator Strategies:

  • Using CSS_SELECTOR:

    elements = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "ul.ltr li[id^='t_b_'] > a[id^='t_a_'][href]")))
    
  • Using XPATH:

    elements = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//ul[@class='ltr']//li[starts-with(@id, 't_b_')]/a[starts-with(@id, 't_a_') and starts-with(., 'Category')]")))
    
  • Note : You have to add the following imports :

    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support import expected_conditions as EC
    

Incase your usecase is to wait for certain number of elements to be loaded e.g. 10 elements, you can use you can use the lambda function as follows:

  • Using >:

    myLength = 9
    WebDriverWait(driver, 20).until(lambda driver: len(driver.find_elements_by_xpath("//ul[@class='ltr']//li[starts-with(@id, 't_b_')]/a[starts-with(@id, 't_a_') and starts-with(., 'Category')]")) > int(myLength))
    
  • Using ==:

    myLength = 10
    WebDriverWait(driver, 20).until(lambda driver: len(driver.find_elements_by_xpath("//ul[@class='ltr']//li[starts-with(@id, 't_b_')]/a[starts-with(@id, 't_a_') and starts-with(., 'Category')]")) == int(myLength))
    

You can find a relevant discussion in How to wait for number of elements to be loaded using Selenium and Python


References

You can find a couple of relevant detailed discussions in:

Solution 3

I created AllEc which basically piggybacks on WebDriverWait.until logic.

This will wait until the timeout occurs or when all of the elements have been found.

from typing import Callable
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException

class AllEc(object):
    def __init__(self, *args: Callable, description: str = None):
        self.ecs = args
        self.description = description

    def __call__(self, driver):
        try:
            for fn in self.ecs:
                if not fn(driver):
                    return False
            return True
        except StaleElementReferenceException:
            return False

# usage example:
wait = WebDriverWait(driver, timeout)
ec1 = EC.invisibility_of_element_located(locator1)
ec2 = EC.invisibility_of_element_located(locator2)
ec3 = EC.invisibility_of_element_located(locator3)

all_ec = AllEc(ec1, ec2, ec3, description="Required elements to show page has loaded.") 
found_elements = wait.until(all_ec, "Could not find all expected elements")

Alternatively I created AnyEc to look for multiple elements but returns on the first one found.

class AnyEc(object):
    """
    Use with WebDriverWait to combine expected_conditions in an OR.

    Example usage:

        >>> wait = WebDriverWait(driver, 30)
        >>> either = AnyEc(expectedcondition1, expectedcondition2, expectedcondition3, etc...)
        >>> found = wait.until(either, "Cannot find any of the expected conditions")
    """

    def __init__(self, *args: Callable, description: str = None):
        self.ecs = args
        self.description = description

    def __iter__(self):
        return self.ecs.__iter__()

    def __call__(self, driver):
        for fn in self.ecs:
            try:
                rt = fn(driver)
                if rt:
                    return rt
            except TypeError as exc:
                raise exc
            except Exception as exc:
                # print(exc)
                pass

    def __repr__(self):
        return " ".join(f"{e!r}," for e in self.ecs)

    def __str__(self):
        return f"{self.description!s}"

either = AnyEc(ec1, ec2, ec3)
found_element = wait.until(either, "Could not find any of the expected elements")

Lastly, if it's possible to do so, you could try waiting for Ajax to be finished. This is not useful in all cases -- e.g. Ajax is always active. In the cases where Ajax runs and finishes it can work. There are also some ajax libraries that do not set the active attribute, so double check that you can rely on this.

def is_ajax_complete(driver)
    rt = driver.execute_script("return jQuery.active", *args)
    return rt == 0

wait.until(lambda driver: is_ajax_complete(driver), "Ajax did not finish")

Share:
13,349
Ragnar Lodbrok
Author by

Ragnar Lodbrok

Python (PyQt 4, SqlAlchemy, Camelot, etc.) PHP (Kohana, Laravel 4, Slim, hate Drupal) JS (jQuery primarily) MySql, C++ (almost forgotten)

Updated on June 14, 2022

Comments

  • Ragnar Lodbrok
    Ragnar Lodbrok almost 2 years

    I have a list, which is dynamically loaded by AJAX. At first, while loading, it's code is like this:

    <ul><li class="last"><a class="loading" href="#"><ins>&nbsp;</ins>Загрузка...</a></li></ul>
    

    When the list is loaded, all of it li and a are changed. And it's always more than 1 li. Like this:

    <ul class="ltr">
    <li id="t_b_68" class="closed" rel="simple">
    <a id="t_a_68" href="javascript:void(0)">Category 1</a>
    </li>
    <li id="t_b_64" class="closed" rel="simple">
    <a id="t_a_64" href="javascript:void(0)">Category 2</a>
    </li>
    ...
    

    I need to check if list is loaded, so I check if it has several li.

    So far I tried:

    1) Custom waiting condition

    class more_than_one(object):
        def __init__(self, selector):
            self.selector = selector
    
        def __call__(self, driver):
            elements = driver.find_elements_by_css_selector(self.selector)
            if len(elements) > 1:
                return True
            return False
    

    ...

    try:
            query = WebDriverWait(driver, 30).until(more_than_one('li'))
        except:
            print "Bad crap"
        else:
            # Then load ready list
    

    2) Custom function based on find_elements_by

    def wait_for_several_elements(driver, selector, min_amount, limit=60):
        """
        This function provides awaiting of <min_amount> of elements found by <selector> with
        time limit = <limit>
        """
        step = 1   # in seconds; sleep for 500ms
        current_wait = 0
        while current_wait < limit:
            try:
                print "Waiting... " + str(current_wait)
                query = driver.find_elements_by_css_selector(selector)
                if len(query) > min_amount:
                    print "Found!"
                    return True
                else:
                    time.sleep(step)
                    current_wait += step
            except:
                time.sleep(step)
                current_wait += step
    
        return False
    

    This doesn't work, because driver (current element passed to this function) gets lost in DOM. UL isn't changed but Selenium can't find it anymore for some reason.

    3) Excplicit wait. This just sucks, because some lists are loaded instantly and some take 10+ secs to load. If I use this technique I have to wait max time every occurence, which is very bad for my case.

    4) Also I can't wait for child element with XPATH correctly. This one just expects ul to appear.

    try:
        print "Going to nested list..."
        #time.sleep(WAIT_TIME)
        query = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, './/ul')))
        nested_list = child.find_element_by_css_selector('ul')
    

    Please, tell me the right way to be sure, that several heir elements are loaded for specified element.

    P.S. All this checks and searches should be relative to current element.