Post unicode string to web service using Python Requests library

37,142

It is not clear what content type json-tagger.herokuapp.com expects (the examples are contradictory). You could try to post the data as text:

#!/usr/bin/env python
import requests  # pip install requests

r = requests.post(url,
                  data=text.encode('utf-8'),
                  headers={'Content-type': 'text/plain; charset=utf-8'})
print(r.json())

Or you could try to send it as application/x-www-form-urlencoded:

#!/usr/bin/env python
import requests  # pip install requests

r = requests.post(url, data=dict(data=text))
print(r.json())

The server may reject both, accept both, accept one but not the other, or expect some other format (e.g., application/json), etc.

Share:
37,142
mattiasostmar
Author by

mattiasostmar

Computational Media Analyst

Updated on July 22, 2022

Comments

  • mattiasostmar
    mattiasostmar almost 2 years

    I'm trying to post a snippet of text containing fancy unicode symbols to a web service using the requests library. I'm using Python 3.5.

    text = "Två dagar kvar🎉🎉"
    r = requests.post("http://json-tagger.herokuapp.com/tag", data=text)
    print(r.json()
    

    I get an UnicodeEncodeError, but I can't figure out what I'm doing wrong on my side, the docs for requests only talk about unicode in GET requests from what I see.

        UnicodeEncodeError                        Traceback (most recent call last)
    <ipython-input-125-3ebcae3d7918> in <module>()
         19         print("cleaned : " + line)
         20 
    ---> 21         r = requests.post("http://json-tagger.herokuapp.com/tag", data=line)
         22         sentences = r.json()['sentences']
         23         for sentence in sentences:
    
    //anaconda/lib/python3.4/site-packages/requests/api.py in post(url, data, json, **kwargs)
        105     """
        106 
    --> 107     return request('post', url, data=data, json=json, **kwargs)
        108 
        109 
    
    //anaconda/lib/python3.4/site-packages/requests/api.py in request(method, url, **kwargs)
         51     # cases, and look like a memory leak in others.
         52     with sessions.Session() as session:
    ---> 53         return session.request(method=method, url=url, **kwargs)
         54 
         55 
    
    //anaconda/lib/python3.4/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth,     timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
        466         }
        467         send_kwargs.update(settings)
    --> 468         resp = self.send(prep, **send_kwargs)
        469 
        470         return resp
    
    //anaconda/lib/python3.4/site-packages/requests/sessions.py in send(self, request, **kwargs)
        574 
        575         # Send the request
    --> 576         r = adapter.send(request, **kwargs)
        577 
        578         # Total elapsed time of the request (approximately)
    
    //anaconda/lib/python3.4/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
        374                     decode_content=False,
        375                     retries=self.max_retries,
    --> 376                     timeout=timeout
        377                 )
        378 
    
    //anaconda/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries,     redirect, assert_same_host, timeout, pool_timeout, release_conn, **response_kw)
        557             httplib_response = self._make_request(conn, method, url,
        558                                                   timeout=timeout_obj,
    --> 559                                                   body=body, headers=headers)
        560 
        561             # If we're going to release the connection in ``finally:``, then
    
    //anaconda/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout,     **httplib_request_kw)
        351         # conn.request() calls httplib.*.request, not the method in
        352         # urllib3.request. It also calls makefile (recv) on the socket.
    --> 353         conn.request(method, url, **httplib_request_kw)
        354 
        355         # Reset the timeout for the recv() on the socket
    
    //anaconda/lib/python3.4/http/client.py in request(self, method, url, body, headers)
       1086     def request(self, method, url, body=None, headers={}):
       1087         """Send a complete request to the server."""
    -> 1088         self._send_request(method, url, body, headers)
       1089 
       1090     def _set_content_length(self, body):
    
    //anaconda/lib/python3.4/http/client.py in _send_request(self, method, url, body, headers)
       1123             # RFC 2616 Section 3.7.1 says that text default has a
       1124             # default charset of iso-8859-1.
    -> 1125             body = body.encode('iso-8859-1')
       1126         self.endheaders(body)
       1127 
    
    UnicodeEncodeError: 'latin-1' codec can't encode characters in position 14-15: ordinal not in range(256)
    

    WORKAROUND: I remove all unicode characters from the text from the "emoticon" block, U+1F600 - U+1F64F and Symbols And Pictographs" block, U+1F300 - U+1F5FF according to this answer with the following code, since I don't need emoticons and pictures for the analysis:

    text = re.sub(r'[^\u1F600-\u1F64F ]|[^\u1F300-\u1F5FF ]',"",text)
    

    UPDATE The creator of the web service has fixed this now and updated the documentation. All you have to do is to send an encoded string, in Python 3:

    ""Två dagar kvar🎉🎉".encode("utf-8")