Parsing and sorting keys in Python dictionary

11,385

Solution 1

The following code should work, if I understood the problem correctly:

from collections import defaultdict

out = defaultdict(list)
for k, v in code_dictionary.viewitems():
    for item in k.split('; '):
        out[item].append(v)

Solution 2

output = {u'News; comment; negative': u'contradictory about news', u'News; comment': u'something about news'}
negatives = []
comments = []
news = []
for k, v in output.items():
    key_parts = k.split('; ')
    key_parts = [part.lower() for part in key_parts]
    if 'negative' in key_parts:
        negatives.append(v)
    if 'news' in key_parts:
        news.append(v)
    if 'comment' in key_parts:
        comments.append(v)
Share:
11,385
user1552159
Author by

user1552159

Updated on June 04, 2022

Comments

  • user1552159
    user1552159 almost 2 years

    I created the following dictionary:

    code dictionary =  {u'News; comment; negative': u'contradictory about news', u'News; comment': u'something about news'}
    

    I now want to write some Python code that goes through the dictionary's keys and separates out the codes and their corresponding values. So for the first element in the dictionary, I want to end up with:

    News: 'contradictory about news', 'something about news'
    comment: 'contradictory about news', 'something about news'
    negative: 'contradictory about news'
    

    The end result can be a dictionary, list, or tab or comma-separated text.

    You can see my attempt to do this here:

    from bs4 import BeautifulSoup as Soup
    f = open('transcript.xml','r')
    soup = Soup(f)
    #print soup.prettify()
    
    
    #searches text for all w:commentrangestart tags and makes a dictionary that matches ids with text
    textdict = {}
    for i in soup.find_all('w:commentrangestart'):
            # variable 'key' is assigned to the tag id
            key = i.parent.contents[1].attrs['w:id']
            key = str(key)
            #variable 'value' is assigned to the tag's text
            value= ''.join(i.nextSibling.findAll(text=True))
            # key / value pairs are added to the dictionary 'textdict'
            textdict[key]=value
    print "Transcript Text = " , textdict
    
    # makes a dictionary that matches ids with codes        
    codedict = {}
    for i in soup.find_all('w:comment'):
            key = i.attrs['w:id']
            key = str(key)
            value= ''.join(i.findAll(text=True))
            codedict[key]=value
    print "Codes = ", codedict
    
    # makes a dictionary that matches all codes with text
    output = {}
    for key in set(textdict.keys()).union(codedict.keys()):
            print "key= ", key
            txt = textdict[key]
            print "txt = ", txt
            ct = codedict[key]
            print "ct= ", ct
            output[ct] = txt
            #print "output = ", output
    print "All code dictionary = ", output
    
    #codelist={}
    #for key in output:
    #   codelist =key.split(";")
    #print "codelist= " , codelist
    
    
    code_negative = {}
    code_news = {}
    print output.keys()
    for i in output:
        if 'negative' in output.keys():
            print 'yay'
            code_negative[i]=textdict[i]
            print 'text coded negative: ' , code_negative
        if 'News' in i:
            code_news[i]=textdict[i]
            print 'text coded News: ' ,code_news
    

    For some reason though, I keep getting a key error when I run the last function:

    code_negative = {}
    code_news = {}
    for i in output:
        if 'negative' in output.keys():
            code_negative[i]=textdict[i]
        print 'text coded negative: ' , code_negative
    if 'News' in i:
        code_news[i]=textdict[i]
        print 'text coded News: ' ,code_news
    

    Any ideas? Thanks!