parsing HTML table using python - HTMLparser or lxml

20,061

Solution 1

Something like this should work:

>>> from lxml.html import parse
>>> page = parse("test.html")
>>> rows = page.xpath("body/table")[0].findall("tr")
>>> data = list()
>>> for row in rows:
...     data.append([c.text for c in row.getchildren()])
... 
>>> for row in data[4:]: print(row)
... 
['2050', 'January', '0', '1', '3', '0', '4', '16', '0', '6', '2', '2', '0', '3', '0', '3', '2', '0', '26', '1', '0', '0', '7', '0', '5', '6', '0', '8', '2', '0', '0', '0', '0', '0', '0', '0', '2', '0']
['February', '1', '0', '8', '0', '2', '4', '1', '6', '1', '2', '0', '3', '0', '0', '4', '0', '25', '0', '0', '1', '2', '0', '4', '14', '1', '1', '0', '0', '0', '0', '0', '0', '1', '0', '0', '0']
['March', '0', '0', '4', '0', '4', '7', '0', '9', '2', '1', '0', '0', '0', '2', '9', '0', '45', '1', '0', '0', '7', '0', '10', '16', '0', '5', '1', '1', '0', '1', '0', '0', '0', '0', '4', '0']
['April', '1', '0', '5', '0', '3', '12', '1', '11', '0', '3', '0', '3', '0', '0', '3', '2', '34', '0', '0', '1', '2', '0', '6', '18', '1', '3', '0', '0', '0', '0', '0', '0', '0', '0', '5', '1']
['May', '7', '0', '6', '0', '8', '4', '1', '13', '0', '0', '2', '2', '0', '1', '7', '1', '30', '0', '0', '0', '7', '0', '5', '12', '0', '4', '1', '0', '0', '0', '0', '0', '0', '0', '6', '1']
['June', '0', '1', '14', '0', '7', '15', '0', '17', '1', '2', '0', '5', '0', '1', '3', '0', '24', '0', '0', '0', '5', '0', '6', '13', '1', '9', '1', '1', '0', '0', '0', '0', '0', '0', '2', '1']
['July', '0', '1', '6', '0', '8', '17', '1', '15', '2', '1', '0', '10', '0', '2', '15', '2', '53', '0', '3', '3', '6', '0', '7', '16', '0', '9', '1', '1', '0', '0', '0', '0', '1', '0', '2', '0']
['August', '2', '0', '5', '0', '8', '15', '1', '17', '0', '2', '0', '2', '0', '5', '16', '0', '33', '0', '0', '0', '11', '0', '2', '25', '4', '8', '0', '0', '0', '1', '0', '0', '0', '0', '3', '0']
['September', '2', '0', '10', '0', '16', '22', '2', '19', '4', '2', '0', '0', '0', '2', '8', '0', '27', '0', '1', '0', '8', '0', '11', '31', '1', '9', '0', '0', '0', '1', '0', '0', '0', '1', '1', '0']
['October', '3', '1', '8', '0', '4', '28', '0', '15', '2', '1', '0', '1', '0', '1', '6', '0', '15', '0', '1', '0', '3', '0', '9', '26', '1', '8', '4', '0', '0', '0', '0', '0', '0', '0', '1', '0']
['November', '0', '3', '3', '0', '6', '23', '1', '8', '1', '2', '0', '1', '0', '3', '7', '1', '20', '0', '0', '0', '8', '0', '3', '18', '3', '7', '0', '0', '0', '0', '0', '0', '0', '0', '3', '0']
['December', '1', '0', '4', '0', '4', '13', '2', '15', '1', '0', '0', '2', '0', '1', '2', '0', '29', '0', '1', '0', '7', '0', '3', '20', '1', '13', '0', '1', '0', '0', '0', '0', '0', '0', '3', '0']

You can use zip() to transpose the table:

>>> d = data[4:]
>>> d[0] = d[0][1:]
>>> zip(*d)
[('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'), ('0', '1', '0', '1', '7', '0', '0', '2', '2', '3', '0', '1'), ('1', '0', '0', '0', '0', '1', '1', '0', '0', '1', '3', '0'), ('3', '8', '4', '5', '6', '14', '6', '5', '10', '8', '3', '4'), ('0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'), ('4', '2', '4', '3', '8', '7', '8', '8', '16', '4', '6', '4'), ('16', '4', '7', '12', '4', '15', '17', '15', '22', '28', '23', '13'), ('0', '1', '0', '1', '1', '0', '1', '1', '2', '0', '1', '2'), ('6', '6', '9', '11', '13', '17', '15', '17', '19', '15', '8', '15'), ('2', '1', '2', '0', '0', '1', '2', '0', '4', '2', '1', '1'), ('2', '2', '1', '3', '0', '2', '1', '2', '2', '1', '2', '0'), ('0', '0', '0', '0', '2', '0', '0', '0', '0', '0', '0', '0'), ('3', '3', '0', '3', '2', '5', '10', '2', '0', '1', '1', '2'), ('0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'), ('3', '0', '2', '0', '1', '1', '2', '5', '2', '1', '3', '1'), ('2', '4', '9', '3', '7', '3', '15', '16', '8', '6', '7', '2'), ('0', '0', '0', '2', '1', '0', '2', '0', '0', '0', '1', '0'), ('26', '25', '45', '34', '30', '24', '53', '33', '27', '15', '20', '29'), ('1', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0'), ('0', '0', '0', '0', '0', '0', '3', '0', '1', '1', '0', '1'), ('0', '1', '0', '1', '0', '0', '3', '0', '0', '0', '0', '0'), ('7', '2', '7', '2', '7', '5', '6', '11', '8', '3', '8', '7'), ('0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'), ('5', '4', '10', '6', '5', '6', '7', '2', '11', '9', '3', '3'), ('6', '14', '16', '18', '12', '13', '16', '25', '31', '26', '18', '20'), ('0', '1', '0', '1', '0', '1', '0', '4', '1', '1', '3', '1'), ('8', '1', '5', '3', '4', '9', '9', '8', '9', '8', '7', '13'), ('2', '0', '1', '0', '1', '1', '1', '0', '0', '4', '0', '0'), ('0', '0', '1', '0', '0', '1', '1', '0', '0', '0', '0', '1'), ('0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'), ('0', '0', '1', '0', '0', '0', '0', '1', '1', '0', '0', '0'), ('0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'), ('0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'), ('0', '1', '0', '0', '0', '0', '1', '0', '0', '0', '0', '0'), ('0', '0', '0', '0', '0', '0', '0', '0', '1', '0', '0', '0'), ('2', '0', '4', '5', '6', '2', '2', '3', '1', '1', '3', '3'), ('0', '0', '0', '1', '1', '1', '0', '0', '0', '0', '0', '0')]

Solution 2

I can't add comments but it might be helpful for someone else:

I had some bold and italic text within the tables cells so c.text returned None. I used c.text_content() instead like:

>>> from lxml.html import parse
>>> page = parse("test.html")
>>> rows = page.xpath("body/table")[0].findall("tr")
>>> data = list()
>>> for row in rows:
...     data.append([c.text_content() for c in row.getchildren()])
... 
Share:
20,061
self
Author by

self

Updated on July 05, 2022

Comments

  • self
    self almost 2 years

    I have a html page which consist of a table & I want to fetch all the values in td, tr in that table.
    I have tried working with beautifulsoup but now i wanted to work on lxml or HML parser with python.

    I have attached the example.
    I want to fetch values as lists of tuple as

    [
    [( value of 2050 jan, value of main subject-part1-sub part1-subject1 ), ( value of 2050 feb, value of main subject-part1-sub part1-subject1 ),... ],
    [( value of 2050 jan, value of main subject-part1-sub part1-subject2 ), ( value of 2050 feb, value of main subject-part1-sub part1-subject2 )... ]
    ]
    

    and so on.

    Can anyone let me know how can I process that in very "optimal" way using lxml or HTML python parser?

    example : test.html

    <HTML>
    <HEAD>
    <TITLE>Title</TITLE>
    </HEAD>
    <BODY>
    <TABLE BORDER>
    <TR ALIGN=LEFT>
    <TH COLSPAN=38>Main Subject</TH>
    </TR>
    <TR ALIGN=LEFT>
    <TH COLSPAN=2> </TH>
    
    <TH VALIGN=TOP COLSPAN=18>part1</TH>
    <TH VALIGN=TOP COLSPAN=18>part2</TH>
    </TR>
    <TR ALIGN=LEFT>
    <TH COLSPAN=2> </TH>
    <TH VALIGN=TOP COLSPAN=9>sub-part1</TH>
    <TH VALIGN=TOP COLSPAN=9>sub-part2</TH>
    <TH VALIGN=TOP COLSPAN=9>sub-part3</TH>
    <TH VALIGN=TOP COLSPAN=9>sub-part4</TH>
    </TR>
    
    <TR ALIGN=LEFT>
    <TH COLSPAN=2> </TH>
    <TH VALIGN=TOP COLSPAN=1>subject1</TH>
    <TH VALIGN=TOP COLSPAN=1>subject2</TH>
    
    <TH VALIGN=TOP COLSPAN=1>subject10</TH>
    <TH VALIGN=TOP COLSPAN=1>subject11</TH>
    <TH VALIGN=TOP COLSPAN=1>subject12</TH>
    <TH VALIGN=TOP COLSPAN=1>subject13</TH>
    <TH VALIGN=TOP COLSPAN=1>subject14</TH>
    <TH VALIGN=TOP COLSPAN=1>subject15</TH>
    <TH VALIGN=TOP COLSPAN=1>subject16</TH>
    
    <TH VALIGN=TOP COLSPAN=1>subject17</TH>
    <TH VALIGN=TOP COLSPAN=1>subject18</TH>
    <TH VALIGN=TOP COLSPAN=1>subject19</TH>
    <TH VALIGN=TOP COLSPAN=1>subject20</TH>
    <TH VALIGN=TOP COLSPAN=1>subject21</TH>
    <TH VALIGN=TOP COLSPAN=1>subject22</TH>
    <TH VALIGN=TOP COLSPAN=1>subject23</TH>
    <TH VALIGN=TOP COLSPAN=1>subject24</TH>
    <TH VALIGN=TOP COLSPAN=1>subject25</TH>
    
    <TH VALIGN=TOP COLSPAN=1>subject26</TH>
    <TH VALIGN=TOP COLSPAN=1>subject27</TH>
    <TH VALIGN=TOP COLSPAN=1>subject28</TH>
    <TH VALIGN=TOP COLSPAN=1>subject29</TH>
    <TH VALIGN=TOP COLSPAN=1>subject30</TH>
    <TH VALIGN=TOP COLSPAN=1>subject31</TH>
    <TH VALIGN=TOP COLSPAN=1>subject32</TH>
    <TH VALIGN=TOP COLSPAN=1>subject33</TH>
    <TH VALIGN=TOP COLSPAN=1>subject34</TH>
    
    <TH VALIGN=TOP COLSPAN=1>subject35</TH>
    <TH VALIGN=TOP COLSPAN=1>subject36</TH>
    </TR>
    <TR ALIGN=RIGHT>
    <TH ALIGN=LEFT VALIGN=TOP ROWSPAN=12>2050</TH>
    <TH ALIGN=LEFT>January</TH>
    <TD>0</TD>
    <TD>1</TD>
    <TD>3</TD>
    <TD>0</TD>
    
    <TD>4</TD>
    <TD>16</TD>
    <TD>0</TD>
    <TD>6</TD>
    <TD>2</TD>
    <TD>2</TD>
    <TD>0</TD>
    <TD>3</TD>
    <TD>0</TD>
    
    <TD>3</TD>
    <TD>2</TD>
    <TD>0</TD>
    <TD>26</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>7</TD>
    <TD>0</TD>
    
    <TD>5</TD>
    <TD>6</TD>
    <TD>0</TD>
    <TD>8</TD>
    <TD>2</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>2</TD>
    <TD>0</TD>
    </TR>
    <TR ALIGN=RIGHT>
    <TH ALIGN=LEFT>February</TH>
    <TD>1</TD>
    <TD>0</TD>
    
    <TD>8</TD>
    <TD>0</TD>
    <TD>2</TD>
    <TD>4</TD>
    <TD>1</TD>
    <TD>6</TD>
    <TD>1</TD>
    <TD>2</TD>
    <TD>0</TD>
    
    <TD>3</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>4</TD>
    <TD>0</TD>
    <TD>25</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>1</TD>
    
    <TD>2</TD>
    <TD>0</TD>
    <TD>4</TD>
    <TD>14</TD>
    <TD>1</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    </TR>
    <TR ALIGN=RIGHT>
    <TH ALIGN=LEFT>March</TH>
    
    <TD>0</TD>
    <TD>0</TD>
    <TD>4</TD>
    <TD>0</TD>
    <TD>4</TD>
    <TD>7</TD>
    <TD>0</TD>
    <TD>9</TD>
    <TD>2</TD>
    
    <TD>1</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>2</TD>
    <TD>9</TD>
    <TD>0</TD>
    <TD>45</TD>
    <TD>1</TD>
    
    <TD>0</TD>
    <TD>0</TD>
    <TD>7</TD>
    <TD>0</TD>
    <TD>10</TD>
    <TD>16</TD>
    <TD>0</TD>
    <TD>5</TD>
    <TD>1</TD>
    
    <TD>1</TD>
    <TD>0</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>4</TD>
    <TD>0</TD>
    
    </TR>
    <TR ALIGN=RIGHT>
    <TH ALIGN=LEFT>April</TH>
    <TD>1</TD>
    <TD>0</TD>
    <TD>5</TD>
    <TD>0</TD>
    <TD>3</TD>
    <TD>12</TD>
    <TD>1</TD>
    
    <TD>11</TD>
    <TD>0</TD>
    <TD>3</TD>
    <TD>0</TD>
    <TD>3</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>3</TD>
    <TD>2</TD>
    
    <TD>34</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>1</TD>
    <TD>2</TD>
    <TD>0</TD>
    <TD>6</TD>
    <TD>18</TD>
    <TD>1</TD>
    
    <TD>3</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    
    <TD>5</TD>
    <TD>1</TD>
    </TR>
    <TR ALIGN=RIGHT>
    <TH ALIGN=LEFT>May</TH>
    <TD>7</TD>
    <TD>0</TD>
    <TD>6</TD>
    <TD>0</TD>
    <TD>8</TD>
    
    <TD>4</TD>
    <TD>1</TD>
    <TD>13</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>2</TD>
    <TD>2</TD>
    <TD>0</TD>
    <TD>1</TD>
    
    <TD>7</TD>
    <TD>1</TD>
    <TD>30</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>7</TD>
    <TD>0</TD>
    <TD>5</TD>
    
    <TD>12</TD>
    <TD>0</TD>
    <TD>4</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    
    <TD>0</TD>
    <TD>0</TD>
    <TD>6</TD>
    <TD>1</TD>
    </TR>
    <TR ALIGN=RIGHT>
    <TH ALIGN=LEFT>June</TH>
    <TD>0</TD>
    <TD>1</TD>
    <TD>14</TD>
    
    <TD>0</TD>
    <TD>7</TD>
    <TD>15</TD>
    <TD>0</TD>
    <TD>17</TD>
    <TD>1</TD>
    <TD>2</TD>
    <TD>0</TD>
    <TD>5</TD>
    
    <TD>0</TD>
    <TD>1</TD>
    <TD>3</TD>
    <TD>0</TD>
    <TD>24</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>5</TD>
    
    <TD>0</TD>
    <TD>6</TD>
    <TD>13</TD>
    <TD>1</TD>
    <TD>9</TD>
    <TD>1</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>0</TD>
    
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>2</TD>
    <TD>1</TD>
    </TR>
    <TR ALIGN=RIGHT>
    <TH ALIGN=LEFT>July</TH>
    <TD>0</TD>
    
    <TD>1</TD>
    <TD>6</TD>
    <TD>0</TD>
    <TD>8</TD>
    <TD>17</TD>
    <TD>1</TD>
    <TD>15</TD>
    <TD>2</TD>
    <TD>1</TD>
    
    <TD>0</TD>
    <TD>10</TD>
    <TD>0</TD>
    <TD>2</TD>
    <TD>15</TD>
    <TD>2</TD>
    <TD>53</TD>
    <TD>0</TD>
    <TD>3</TD>
    
    <TD>3</TD>
    <TD>6</TD>
    <TD>0</TD>
    <TD>7</TD>
    <TD>16</TD>
    <TD>0</TD>
    <TD>9</TD>
    <TD>1</TD>
    <TD>1</TD>
    
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>2</TD>
    <TD>0</TD>
    </TR>
    
    <TR ALIGN=RIGHT>
    <TH ALIGN=LEFT>August</TH>
    <TD>2</TD>
    <TD>0</TD>
    <TD>5</TD>
    <TD>0</TD>
    <TD>8</TD>
    <TD>15</TD>
    <TD>1</TD>
    
    <TD>17</TD>
    <TD>0</TD>
    <TD>2</TD>
    <TD>0</TD>
    <TD>2</TD>
    <TD>0</TD>
    <TD>5</TD>
    <TD>16</TD>
    <TD>0</TD>
    
    <TD>33</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>11</TD>
    <TD>0</TD>
    <TD>2</TD>
    <TD>25</TD>
    <TD>4</TD>
    
    <TD>8</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    
    <TD>3</TD>
    <TD>0</TD>
    </TR>
    <TR ALIGN=RIGHT>
    <TH ALIGN=LEFT>September</TH>
    <TD>2</TD>
    <TD>0</TD>
    <TD>10</TD>
    <TD>0</TD>
    <TD>16</TD>
    
    <TD>22</TD>
    <TD>2</TD>
    <TD>19</TD>
    <TD>4</TD>
    <TD>2</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>2</TD>
    
    <TD>8</TD>
    <TD>0</TD>
    <TD>27</TD>
    <TD>0</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>8</TD>
    <TD>0</TD>
    <TD>11</TD>
    
    <TD>31</TD>
    <TD>1</TD>
    <TD>9</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>0</TD>
    
    <TD>0</TD>
    <TD>1</TD>
    <TD>1</TD>
    <TD>0</TD>
    </TR>
    <TR ALIGN=RIGHT>
    <TH ALIGN=LEFT>October</TH>
    <TD>3</TD>
    <TD>1</TD>
    <TD>8</TD>
    
    <TD>0</TD>
    <TD>4</TD>
    <TD>28</TD>
    <TD>0</TD>
    <TD>15</TD>
    <TD>2</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>1</TD>
    
    <TD>0</TD>
    <TD>1</TD>
    <TD>6</TD>
    <TD>0</TD>
    <TD>15</TD>
    <TD>0</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>3</TD>
    
    <TD>0</TD>
    <TD>9</TD>
    <TD>26</TD>
    <TD>1</TD>
    <TD>8</TD>
    <TD>4</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>1</TD>
    <TD>0</TD>
    </TR>
    <TR ALIGN=RIGHT>
    <TH ALIGN=LEFT>November</TH>
    <TD>0</TD>
    
    <TD>3</TD>
    <TD>3</TD>
    <TD>0</TD>
    <TD>6</TD>
    <TD>23</TD>
    <TD>1</TD>
    <TD>8</TD>
    <TD>1</TD>
    <TD>2</TD>
    
    <TD>0</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>3</TD>
    <TD>7</TD>
    <TD>1</TD>
    <TD>20</TD>
    <TD>0</TD>
    <TD>0</TD>
    
    <TD>0</TD>
    <TD>8</TD>
    <TD>0</TD>
    <TD>3</TD>
    <TD>18</TD>
    <TD>3</TD>
    <TD>7</TD>
    <TD>0</TD>
    <TD>0</TD>
    
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>3</TD>
    <TD>0</TD>
    </TR>
    
    <TR ALIGN=RIGHT>
    <TH ALIGN=LEFT>December</TH>
    <TD>1</TD>
    <TD>0</TD>
    <TD>4</TD>
    <TD>0</TD>
    <TD>4</TD>
    <TD>13</TD>
    <TD>2</TD>
    
    <TD>15</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>2</TD>
    <TD>0</TD>
    <TD>1</TD>
    <TD>2</TD>
    <TD>0</TD>
    
    <TD>29</TD>
    <TD>0</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>7</TD>
    <TD>0</TD>
    <TD>3</TD>
    <TD>20</TD>
    <TD>1</TD>
    
    <TD>13</TD>
    <TD>0</TD>
    <TD>1</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    <TD>0</TD>
    
    <TD>3</TD>
    <TD>0</TD>
    </TR>
    </TABLE>
    </BODY>
    </HTML>
    
  • self
    self about 12 years
    thanks but if i want the rows in vertical way as i mentioned then what should we do?
  • matt wilkie
    matt wilkie over 8 years
    this helped me grab the text from <a href ..> tags inside td's. Thank you.