Merging elements inside a xml.etree.ElementTree

I have a huge test data like the one provided below (and yes I have no control over this data). Each line is actually 6 parts and I need to generate an XML based on this data.

Nav;Basic;Dest;Smoke;No;Yes;
Nav;Dest;Recent;Regg;No;Yes;
Nav;Dest;Favourites;Regg;No;Yes;
...
Nav;Dest using on board;By POI;Smoke;No;Yes;
Nav;Dest using on board;Other;Regg;No;Yes;

The first 3 elements on each line denotes "test suites"-XML element and the last 3 element should create a "test case"-XML element.

I have successfully converted it into a XML using the following code:

# testsuite (root)
testsuite = ET.Element('testsuite')
testsuite.set("name", "Tests")

def _create_testcase_tag(elem):
    global testsuite

    level1, level2, level3, elem4, elem5, elem6 = elem

    # -- testsuite (level1)
    testsuite_level1 = ET.SubElement(testsuite, "testsuite")
    testsuite_level1.set("name", level1)

    # -- testsuite (level2)
    testsuite_level2 = ET.SubElement(testsuite_level1, "testsuite")
    testsuite_level2.set("name", level2)

    # -- testsuite (level3)
    testsuite_level2 = ET.SubElement(testsuite_level2, "testsuite")
    testsuite_level2.set("name", level3)

    # -- testcase
    testcase = ET.SubElement(testsuite_level2, "testcase")
    testcase.set("name", "TBD")
    summary = ET.SubElement(testcase, "summary")
    summary.text = "Test Type= %s, Automated= %s, Available=%s" %(elem4, elem5, elem6)

with open(input_file) as in_file:
    for line_number, a_line in enumerate(in_file):
        try:
            parameters = a_line.split(';')
            if len(parameters) >= 6:
                level1 = parameters[0].strip()
                level2 = parameters[1].strip()
                level3 = parameters[2].strip()
                elem4 = parameters[3].strip()
                elem5 = parameters[4].strip()
                elem6 = parameters[5].strip()

                lines_as_list.append((level1, level2, level3, elem4, elem5, elem6))
        except ValueError:
            pass

lines_as_list.sort()
for elem in lines_as_list:
    _create_testcase_tag(elem)

output_xml = ET.ElementTree(testsuite)
ET.ElementTree.write(output_xml, output_file, xml_declaration=True, encoding="UTF-8")

The above code generates an XML like this:

<testsuite name="Tests">
    <testsuite name="Nav">
        <testsuite name="Basic navigation">
            <testsuite name="Set destination">
                <testcase name="TBD">
                    <summary>Test Type= Smoke test Automated= No, Available=Yes</summary>
                </testcase>
            </testsuite>
        </testsuite>
    </testsuite>

    <testsuite name="Nav">
        <testsuite name="Set destination">
            <testsuite name="Recent">
                <testcase name="TBD">
                    <summary>
                    Test Type= Reggression test Automated= No, Available=Yes
                    </summary>
                </testcase>
            </testsuite>
        </testsuite>
    </testsuite>
</testsuite>
...

This is all correct, but as you can see I have created a whole tree for each line and that is not what I need. I need to combine e.g. all testsuite with the same name into one testsuite and also perform that recursively. So the XML looks like this instead:

<testsuite name="Tests">
    <testsuite name="Nav">
        <testsuite name="Basic navigation">
            <testsuite name="Set destination">
                <testcase name="TBD">
                    <summary>Test Type= Smoke test Automated= No, Available=Yes</summary>
                </testcase>
            </testsuite>
            <testsuite name="Recent">
                <testcase name="TBD">
                    <summary>
                    Test Type= Reggression test Automated= No, Available=Yes
                    </summary>
                </testcase>
            </testsuite>
        </testsuite>
    </testsuite>
</testsuite>

I hope you can understand what I mean, but level1, level2 and level3 should be unique with testcases inside.

How should I do this? Please do not suggest the use of any external libraries! I can not install new libraries in customer site. xml.etree.ElementTree is all I have.

Thanks

Answers


Sort your data on the the first three fields then use itertools.groupby() to organize the data the way you want prior to adding to the xml. This assumes the level of nesting shown in your example:

from operator import itemgetter
from itertools import groupby

names = itemgetter(slice(3))
test_items = itemgetter(slice(3,None))
level1 = itemgetter(0)
level2 = itemgetter(1)
level3 = itemgetter(2)


s = """Nav;Basic;Dest;Smoke;No;Yes;
Nav;Dest;Recent;Regg;No;Yes;
Nav;Dest;Favourites;Regg;No;Yes;
Nav;Dest using on board;By POI;Smoke;No;Yes;
Nav;Dest;Recent;Regg;Yes;Yes;
Nav;Dest using on board;Other;Regg;No;Yes;
Nav;Basic;Dest;Smoke;Yes;Yes;
Nav;Basic;Dest;Smoke;Yes;No;
"""
data = list()
for line in s.splitlines():
    data.append(line.rstrip(';').split(';'))
data.sort(key = names)
##for thing in data:
##    print thing

testsuite = ET.Element('testsuite')
testsuite.set("name", "Tests")
for key1, group1 in groupby(data, level1):
    print '***', key1
    testsuite_level1 = ET.SubElement(testsuite, "testsuite")
    testsuite_level1.set("name", key1)
    for key2, group2 in groupby(group1, level2):
        print '******', key2
        testsuite_level2 = ET.SubElement(testsuite_level1, "testsuite")
        testsuite_level2.set("name", key2)
        for key3, group3 in groupby(group2, level3):
            print '*********', key3
            testsuite_level3 = ET.SubElement(testsuite_level2, "testsuite")
            testsuite_level3.set("name", key3)
            testcase = ET.SubElement(testsuite_level3, "testcase")
            testcase.set("name", "TBD")
            for element in group3:
                print '         ', test_items(element)
                summary = ET.SubElement(testcase, "summary")
                summary.text = "Test Type= {}, Automated= {}, Available={}".format(*test_items(element))

>>>
*** Nav
****** Basic
********* Dest
          ['Smoke', 'No', 'Yes']
          ['Smoke', 'Yes', 'Yes']
          ['Smoke', 'Yes', 'No']
****** Dest
********* Favourites
          ['Regg', 'No', 'Yes']
********* Recent
          ['Regg', 'No', 'Yes']
          ['Regg', 'Yes', 'Yes']
****** Dest using on board
********* By POI
          ['Smoke', 'No', 'Yes']
********* Other
          ['Regg', 'No', 'Yes']
>>>

In response to the comment proper XML solution, I came up with this - perhaps it is what you had in mind. I had to make unique tags instead of every tag being testsuite. This has the advantage of creating the tree on-the-fly :

def _create_testcase_tag(testsuite, elem):
    """Add elem to testsuite.

    testsuite --> xml.etree.ElementTree
    elem --> list

    return xml.etree.ElementTree
    """

    level1, level2, level3, elem4, elem5, elem6 = elem
    level1 = level1.replace(' ','_')
    level2 = level2.replace(' ','_')
    level3 = level3.replace(' ','_')

    # -- testsuite (level1)
    testsuite_level1 = testsuite.find(level1)
    if not testsuite_level1:
        testsuite_level1 = ET.SubElement(testsuite, level1)

    # -- testsuite (level2)
    testsuite_level2 = testsuite_level1.find(level2)
    if not testsuite_level2:
        testsuite_level2 = ET.SubElement(testsuite_level1, level2)

    # -- testsuite (level3)
    testsuite_level3 = testsuite_level2.find(level3)
    if not testsuite_level3:
        testsuite_level3 = ET.SubElement(testsuite_level2, level3)

    # -- testcase
    testcase = ET.SubElement(testsuite_level3, "testcase")
    testcase.set("name", "TBD")
    summary = ET.SubElement(testcase, "summary")
    summary.text = "Test Type= {}, Automated= {}, Available={}".format(elem4, elem5, elem6)

    return testsuite

# testsuite (root)
testsuite = ET.Element('testsuite')
testsuite.set("name", "Tests")

with open(input_file) as in_file:
    for line in in_file:
        line = line.strip().rstrip(';').split(';')
        testsuite = _create_testcase_tag(testsuite, line)

Need Your Help

Contours in OpenCV?

c++ image video opencv

I have drawn a circle on my image, and I want to find any points where the circle and a specific contour intersect.

session variable keeps disappearing following selection

php session

I have a value that I receive from another url from $_SESSION. It arrives glued together with a second value I needs as {$is:$user} or {2:bob}. I split them with they with explode and 'attempt' to ...

Difference between “on-heap” and “off-heap”

java memory heap ehcache

Ehcache talks about on-heap and off-heap memory. What is the difference? What JVM args are used to configure them?

About UNIX Resources Network

Original, collect and organize Developers related documents, information and materials, contains jQuery, Html, CSS, MySQL, .NET, ASP.NET, SQL, objective-c, iPhone, Ruby on Rails, C, SQL Server, Ruby, Arrays, Regex, ASP.NET MVC, WPF, XML, Ajax, DataBase, and so on.