Menu

#271 python2.x: cannot dump to `io.StringIO()`

closed
nobody
None
minor
bug
2020-01-23
2020-01-23
No

testcase

import io
import sys

import pytest
from ruamel.yaml import YAML


@pytest.mark.parametrize('io_inst', (io.BytesIO(), io.StringIO()))
def test(io_inst):
    y = YAML()
    y.dump('x', io_inst)

python3.x

$ ./venv/bin/pytest  t.py 
============================= test session starts ==============================
platform linux -- Python 3.6.7, pytest-4.0.2, py-1.7.0, pluggy-0.8.0
rootdir: /home/asottile/workspace/pre-commit, inifile: tox.ini
plugins: env-0.6.2
collected 2 items                                                              

t.py ..                                                                  [100%]

=========================== 2 passed in 0.02 seconds ===========================

python2.x

$ ./venv2/bin/pytest  t.py 
============================= test session starts ==============================
platform linux2 -- Python 2.7.15rc1, pytest-4.1.0, py-1.7.0, pluggy-0.8.0
rootdir: /home/asottile/workspace/pre-commit, inifile: tox.ini
collected 2 items                                                              

t.py .F                                                                  [100%]

=================================== FAILURES ===================================
________________________________ test[io_inst1] ________________________________

io_inst = <_io.StringIO object at 0x7fbd72501a50>

    @pytest.mark.parametrize('io_inst', (io.BytesIO(), io.StringIO()))
    def test(io_inst):
        y = YAML()
>       y.dump('x', io_inst)

t.py:11: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
venv2/local/lib/python2.7/site-packages/ruamel/yaml/main.py:439: in dump
    return self.dump_all([data], stream, _kw, transform=transform)
venv2/local/lib/python2.7/site-packages/ruamel/yaml/main.py:453: in dump_all
    self._context_manager.dump(data)
venv2/local/lib/python2.7/site-packages/ruamel/yaml/main.py:801: in dump
    self._yaml.representer.represent(data)
venv2/local/lib/python2.7/site-packages/ruamel/yaml/representer.py:85: in represent
    self.serializer.serialize(node)
venv2/local/lib/python2.7/site-packages/ruamel/yaml/serializer.py:117: in serialize
    self.serialize_node(node, None, None)
venv2/local/lib/python2.7/site-packages/ruamel/yaml/serializer.py:180: in serialize_node
    comment=node.comment,
venv2/local/lib/python2.7/site-packages/ruamel/yaml/emitter.py:252: in emit
    self.state()
venv2/local/lib/python2.7/site-packages/ruamel/yaml/emitter.py:392: in expect_document_root
    self.expect_node(root=True)
venv2/local/lib/python2.7/site-packages/ruamel/yaml/emitter.py:414: in expect_node
    self.expect_scalar()
venv2/local/lib/python2.7/site-packages/ruamel/yaml/emitter.py:463: in expect_scalar
    self.process_scalar()
venv2/local/lib/python2.7/site-packages/ruamel/yaml/emitter.py:872: in process_scalar
    self.write_plain(self.analysis.scalar, split)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <ruamel.yaml.emitter.Emitter object at 0x7fbd71d98b90>, text = 'x'
split = True

    def write_plain(self, text, split=True):
        # type: (Any, Any) -> None
        if self.root_context:
            if self.requested_indent is not None:
                self.write_line_break()
                if self.requested_indent != 0:
                    self.write_indent()
            else:
                self.open_ended = True
        if not text:
            return
        if not self.whitespace:
            data = u' '
            self.column += len(data)
            if self.encoding:
                data = data.encode(self.encoding)
            self.stream.write(data)
        self.whitespace = False
        self.indention = False
        spaces = False
        breaks = False
        start = end = 0
        while end <= len(text):
            ch = None
            if end < len(text):
                ch = text[end]
            if spaces:
                if ch != u' ':
                    if start + 1 == end and self.column > self.best_width and split:
                        self.write_indent()
                        self.whitespace = False
                        self.indention = False
                    else:
                        data = text[start:end]
                        self.column += len(data)
                        if self.encoding:
                            data = data.encode(self.encoding)
                        self.stream.write(data)
                    start = end
            elif breaks:
                if ch not in u'\n\x85\u2028\u2029':
                    if text[start] == u'\n':
                        self.write_line_break()
                    for br in text[start:end]:
                        if br == u'\n':
                            self.write_line_break()
                        else:
                            self.write_line_break(br)
                    self.write_indent()
                    self.whitespace = False
                    self.indention = False
                    start = end
            else:
                if ch is None or ch in u' \n\x85\u2028\u2029':
                    data = text[start:end]
                    self.column += len(data)
                    if self.encoding:
                        data = data.encode(self.encoding)
                    try:
>                       self.stream.write(data)
E                       TypeError: unicode argument expected, got 'str'

venv2/local/lib/python2.7/site-packages/ruamel/yaml/emitter.py:1598: TypeError
----------------------------- Captured stdout call -----------------------------
'x'
====================== 1 failed, 1 passed in 0.18 seconds ======================

(originally posted on 2019-01-07 at 14:26:10 by Anthony Sottile <asottile@bitbucket>)

Discussion

  • Anthon van der Neut

    • status set to wontfix

    Get the BytesIO and StringIO from ruamel.yaml.compat:

    import io
    import sys
    
    import pytest
    from ruamel.yaml import YAML
    from ruamel.yaml.compat import BytesIO, StringIO
    
    
    @pytest.mark.parametrize('io_inst', (BytesIO(), StringIO()))
    def test(io_inst):
        y = YAML()
        y.dump('x', io_inst)
    

    and then:

    $ pytest2 t.py 
    ==================================================== test session starts ====================================================
    platform linux2 -- Python 2.7.13, pytest-3.1.3, py-1.4.34, pluggy-0.4.0
    rootdir: /home/avdndata/hg/ruamel.eu/src/issue/20190107_yaml_271, inifile:
    collected 2 items 
    
    t.py ..
    
    ================================================= 2 passed in 0.02 seconds ==================================================
    $ pytest t.py 
    ==================================================== test session starts ====================================================
    platform linux -- Python 3.6.0, pytest-3.1.3, py-1.4.34, pluggy-0.4.0
    rootdir: /home/avdndata/hg/ruamel.eu/src/issue/20190107_yaml_271, inifile:
    collected 2 items 
    
    t.py ..
    

    (originally posted on 2019-01-07 at 15:21:55)

     
  • Anthon van der Neut

    the compat BytesIO and StringIO do not have the correct semantics:

    import pytest
    from ruamel.yaml.compat import BytesIO
    from ruamel.yaml.compat import StringIO
    
    
    
    def test_bytesio():
        bio = BytesIO()
        with pytest.raises(TypeError):
            bio.write(u'hi')
    
    
    def test_stringio():
        sio = StringIO()
        with pytest.raises(TypeError):
            sio.write(b'hi')
    
    $ ./venv2/bin/pytest t.py
    ============================= test session starts ==============================
    platform linux2 -- Python 2.7.15rc1, pytest-4.1.0, py-1.7.0, pluggy-0.8.0
    rootdir: /home/asottile/workspace/pre-commit, inifile: tox.ini
    collected 2 items                                                              
    
    t.py FF                                                                  [100%]
    
    =================================== FAILURES ===================================
    _________________________________ test_bytesio _________________________________
    
        def test_bytesio():
            bio = BytesIO()
            with pytest.raises(TypeError):
    >           bio.write(u'hi')
    E           Failed: DID NOT RAISE <type 'exceptions.TypeError'>
    
    t.py:10: Failed
    ________________________________ test_stringio _________________________________
    
        def test_stringio():
            sio = StringIO()
            with pytest.raises(TypeError):
    >           sio.write(b'hi')
    E           Failed: DID NOT RAISE <type 'exceptions.TypeError'>
    
    t.py:16: Failed
    =========================== 2 failed in 0.05 seconds ===========================
    

    (originally posted on 2019-01-07 at 15:25:55 by Anthony Sottile <asottile@bitbucket>)

     
  • Anthon van der Neut

    @ruamel -- not sure if you saw this but I can't use ruamel.yaml.compat.StringIO / BytesIO because they don't have proper unicode-safe semantics

    (originally posted on 2019-01-08 at 07:42:30 by Anthony Sottile <asottile@bitbucket>)

     
  • Anthon van der Neut

    Yes I saw this, but I don't see an acceptable change to the code base, I maybe could test some property of the output stream and do a decode of the generated output.

    You'll have to make sure your stream can handle bytes, because YAML() always sets the encoding to utf-8 (and allow_unicode = True). So you need a binary stream, or a stream smart enough to do the conversion, and I also want the CStringIO (on Python2), YAML is slow enough as it is. (or conversely, set the encoding differently).

    (originally posted on 2019-01-08 at 08:46:05)

     
  • Anthon van der Neut

    The io module is c in 2.7+ iirc

    (originally posted on 2019-01-08 at 15:38:25 by Anthony Sottile <asottile@bitbucket>)

     
  • Anthon van der Neut

    You are right, it was 2.6's io.py that had the text

    # This is a prototype; hopefully eventually some of this will be
    # reimplemented in C.
    

    But I no longer support 2.6 in the library.

    I'll see how much gets broken if I change the Python2 code in compat.py

    (originally posted on 2019-01-08 at 15:55:14)

     
  • Anthon van der Neut

    • status set to open

    (originally posted on 2019-01-08 at 15:55:29)

     
  • Anthon van der Neut

    • status set to closed

    Changing to use io doesn't bring anything. If you want to write to a StringIO in 2.7 you'll have to disable the utf-8 encoding:

    def test(io_inst):
        y = YAML()
        y.encoding = None
        y.dump('x', io_inst)
    

    if you want to work with unicode output.

    (originally posted on 2019-01-08 at 17:27:14)

     

Log in to post a comment.