Equivalent in C# of Python's "struct.pack/unpack"?

10,782

Solution 1

I ended up writing my own class to handle this. It's pretty complex, but it does seem to work. It's also incomplete, but it works for what I need at this point. Feel free to use it, and if there's any good improvements, please let me know.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Diagnostics;

// This is a crude implementation of a format string based struct converter for C#.
// This is probably not the best implementation, the fastest implementation, the most bug-proof implementation, or even the most functional implementation.
// It's provided as-is for free. Enjoy.

public class StructConverter
{
    // We use this function to provide an easier way to type-agnostically call the GetBytes method of the BitConverter class.
    // This means we can have much cleaner code below.
    private static byte[] TypeAgnosticGetBytes(object o)
    {
        if (o is int) return BitConverter.GetBytes((int)o);
        if (o is uint) return BitConverter.GetBytes((uint)o);
        if (o is long) return BitConverter.GetBytes((long)o);
        if (o is ulong) return BitConverter.GetBytes((ulong)o);
        if (o is short) return BitConverter.GetBytes((short)o);
        if (o is ushort) return BitConverter.GetBytes((ushort)o);
        if (o is byte || o is sbyte) return new byte[] { (byte)o };
        throw new ArgumentException("Unsupported object type found");
    }

    private static string GetFormatSpecifierFor(object o)
    {
        if (o is int) return "i";
        if (o is uint) return "I";
        if (o is long) return "q";
        if (o is ulong) return "Q";
        if (o is short) return "h";
        if (o is ushort) return "H";
        if (o is byte) return "B";
        if (o is sbyte) return "b";
        throw new ArgumentException("Unsupported object type found");
    }

    /// <summary>
    /// Convert a byte array into an array of objects based on Python's "struct.unpack" protocol.
    /// </summary>
    /// <param name="fmt">A "struct.pack"-compatible format string</param>
    /// <param name="bytes">An array of bytes to convert to objects</param>
    /// <returns>Array of objects.</returns>
    /// <remarks>You are responsible for casting the objects in the array back to their proper types.</remarks>
    public static object[] Unpack(string fmt, byte[] bytes)
    {
        Debug.WriteLine("Format string is length {0}, {1} bytes provided.", fmt.Length, bytes.Length);

        // First we parse the format string to make sure it's proper.
        if (fmt.Length < 1) throw new ArgumentException("Format string cannot be empty.");

        bool endianFlip = false;
        if (fmt.Substring(0, 1) == "<")
        {
            Debug.WriteLine("  Endian marker found: little endian");
            // Little endian.
            // Do we need to flip endianness?
            if (BitConverter.IsLittleEndian == false) endianFlip = true;
            fmt = fmt.Substring(1);
        }
        else if (fmt.Substring(0, 1) == ">")
        {
            Debug.WriteLine("  Endian marker found: big endian");
            // Big endian.
            // Do we need to flip endianness?
            if (BitConverter.IsLittleEndian == true) endianFlip = true;
            fmt = fmt.Substring(1);
        }

        // Now, we find out how long the byte array needs to be
        int totalByteLength = 0;
        foreach (char c in fmt.ToCharArray())
        {
            Debug.WriteLine("  Format character found: {0}", c);
            switch (c)
            {
                case 'q':
                case 'Q':
                    totalByteLength += 8;
                    break;
                case 'i':
                case 'I':
                    totalByteLength += 4;
                    break;
                case 'h':
                case 'H':
                    totalByteLength += 2;
                    break;
                case 'b':
                case 'B':
                case 'x':
                    totalByteLength += 1;
                    break;
                default:
                    throw new ArgumentException("Invalid character found in format string.");
            }
        }

        Debug.WriteLine("Endianness will {0}be flipped.", (object) (endianFlip == true ? "" : "NOT "));
        Debug.WriteLine("The byte array is expected to be {0} bytes long.", totalByteLength);

        // Test the byte array length to see if it contains as many bytes as is needed for the string.
        if (bytes.Length != totalByteLength) throw new ArgumentException("The number of bytes provided does not match the total length of the format string.");

        // Ok, we can go ahead and start parsing bytes!
        int byteArrayPosition = 0;
        List<object> outputList = new List<object>();
        byte[] buf;

        Debug.WriteLine("Processing byte array...");
        foreach (char c in fmt.ToCharArray())
        {
            switch (c)
            {
                case 'q':
                    outputList.Add((object)(long)BitConverter.ToInt64(bytes,byteArrayPosition));
                    byteArrayPosition+=8;
                    Debug.WriteLine("  Added signed 64-bit integer.");
                    break;
                case 'Q':
                    outputList.Add((object)(ulong)BitConverter.ToUInt64(bytes,byteArrayPosition));
                    byteArrayPosition+=8;
                    Debug.WriteLine("  Added unsigned 64-bit integer.");
                    break;
                case 'l':
                    outputList.Add((object)(int)BitConverter.ToInt32(bytes, byteArrayPosition));
                    byteArrayPosition+=4;
                    Debug.WriteLine("  Added signed 32-bit integer.");
                    break;
                case 'L':
                    outputList.Add((object)(uint)BitConverter.ToUInt32(bytes, byteArrayPosition));
                    byteArrayPosition+=4;
                    Debug.WriteLine("  Added unsignedsigned 32-bit integer.");
                    break;
                case 'h':
                    outputList.Add((object)(short)BitConverter.ToInt16(bytes, byteArrayPosition));
                    byteArrayPosition += 2;
                    Debug.WriteLine("  Added signed 16-bit integer.");
                    break;
                case 'H':
                    outputList.Add((object)(ushort)BitConverter.ToUInt16(bytes, byteArrayPosition));
                    byteArrayPosition += 2;
                    Debug.WriteLine("  Added unsigned 16-bit integer.");
                    break;
                case 'b':
                    buf = new byte[1];
                    Array.Copy(bytes,byteArrayPosition,buf,0,1);
                    outputList.Add((object)(sbyte)buf[0]);
                    byteArrayPosition++;
                    Debug.WriteLine("  Added signed byte");
                    break;
                case 'B':
                    buf = new byte[1];
                    Array.Copy(bytes, byteArrayPosition, buf, 0, 1);
                    outputList.Add((object)(byte)buf[0]);
                    byteArrayPosition++;
                    Debug.WriteLine("  Added unsigned byte");
                    break;
                case 'x':
                    byteArrayPosition++;
                    Debug.WriteLine("  Ignoring a byte");
                    break;
                default:
                    throw new ArgumentException("You should not be here.");
            }
        }
        return outputList.ToArray();
    }

    /// <summary>
    /// Convert an array of objects to a byte array, along with a string that can be used with Unpack.
    /// </summary>
    /// <param name="items">An object array of items to convert</param>
    /// <param name="LittleEndian">Set to False if you want to use big endian output.</param>
    /// <param name="NeededFormatStringToRecover">Variable to place an 'Unpack'-compatible format string into.</param>
    /// <returns>A Byte array containing the objects provided in binary format.</returns>
    public static byte[] Pack(object[] items, bool LittleEndian, out string NeededFormatStringToRecover)
    {

        // make a byte list to hold the bytes of output
        List<byte> outputBytes = new List<byte>();

        // should we be flipping bits for proper endinanness?
        bool endianFlip = (LittleEndian != BitConverter.IsLittleEndian);

        // start working on the output string
        string outString = (LittleEndian == false ? ">" : "<");

        // convert each item in the objects to the representative bytes
        foreach (object o in items)
        {
            byte[] theseBytes = TypeAgnosticGetBytes(o);
            if (endianFlip == true) theseBytes = (byte[])theseBytes.Reverse();
            outString += GetFormatSpecifierFor(o);
            outputBytes.AddRange(theseBytes);
        }

        NeededFormatStringToRecover = outString;

        return outputBytes.ToArray();

    }

    public static byte[] Pack(object[] items)
    {
        string dummy = "";
        return Pack(items, true, out dummy);
    }
}

Solution 2

BinaryWriter and BinaryReader will send arbitrary items to a byte array or read arbitrary items from a byte array

var str = new MemoryStream();
var bw = new BinaryWriter(str);
bw.Write(42);
bw.Write("hello");
...
var bytes = str.ToArray();

Solution 3

.NET (and thus, C#) has the Marshal.StructureToPtr and Marshal.PtrToStructure methods.

You can abuse these to cast raw memory to a struct like you would in C, not that I'd recommend doing it this way (as it isn't exactly portable). You also need to get your Byte[] array buffer into the native heap in order to perform the operation on it:

T FromBuffer<T>(Byte[] buffer) where T : struct {

    T temp = new T();
    int size = Marshal.SizeOf(temp);
    IntPtr ptr = Marshal.AllocHGlobal(size);

    Marshal.Copy(buffer, 0, ptr, size);

    T ret = (T)Marshal.PtrToStructure(ptr, temp.GetType());
    Marshal.FreeHGlobal(ptr);

    return ret;
}
Share:
10,782

Related videos on Youtube

fdmillion
Author by

fdmillion

Currently an active Python 2 developer; also fluent with Python 3, C# .NET, PHP, Visual Basic 6/.NET and Perl.

Updated on June 30, 2022

Comments

  • fdmillion
    fdmillion about 2 years

    I am a seasoned Python developer and have come to love a lot of its conveniences. I have actually known C# for some time but recently have gotten into some more advanced coding.

    What I'm wondering is if there's a way to "parse" a byte array in C# into a set of (differently sized) items.

    Imagine we have this:

    Python:

    import struct
    byteArray = "\xFF\xFF\x00\x00\x00\xFF\x01\x00\x00\x00"
    numbers = struct.unpack("<LHL",byteArray)
    print numbers[0] # 65535
    print numbers[1] # 255
    print numbers[2] # 1
    
    newNumbers = [0, 255, 1023]
    byteArray = struct.pack("<HHL",newNumbers)
    print byteArray # '\x00\x00\xFF\x00\xFF\x03\x00\x00'
    

    I want to achieve the same effect in C#, without resorting to huge, messy amounts of code like this:

    C#:

    byte[] byteArray = new byte[] { 255, 255, 0, 0, 0, 255, 1, 0, 0, 0 };
    byte[] temp;
    
    int[] values = new int[3];
    
    temp = new byte[4];
    Array.Copy(byteArray, 0, temp, 0, 4);
    values[0] = BitConverter.ToInt32(temp);
    
    temp = new byte[2];
    Array.Copy(byteArray, 4, temp, 0, 2);
    values[1] = BitConverter.ToInt16(temp);
    
    temp = new byte[4];
    Array.Copy(byteArray, 8, temp, 0, 4);
    values[2] = BitConverter.ToInt32(temp);
    
    // Now values contains an array of integer values.
    // It would be OK to assume a common maximum (e.g. Int64) and just cast up to that,
    // but we still have to consider the size of the source bytes.
    
    // Now the other way.
    int[] values = new int[] { 0, 255, 1023 };
    byteArray = new byte[8];
    
    temp = BitConverter.GetBytes(values[0]);
    Array.Copy(temp,2,byteArray,0,2);
    
    temp = BitConverter.GetBytes(values[1]);
    Array.Copy(temp,2,byteArray,2,2);
    
    temp = BitConverter.GetBytes(values[2]);
    Array.Copy(temp,0,byteArray,4,4);
    

    Obviously the C# code I have is very specific and not in any way truly reusable.

    Advice?

  • Dai
    Dai over 9 years
    BinaryReader|Writer does not support writing/reading structs, only primitives, strings and byte arrays.
  • pm100
    pm100 over 9 years
    indeed - but is does exactly what he wants and is the c# logical equivalent of the pack/unpack
  • Bryida
    Bryida over 7 years
    Awesome translation. Just a point to consider the cases i and I in the first loop should be part of the cases l and L in the second loop
  • aevitas
    aevitas almost 6 years
    If you pin the buffer using fixed (byte* b = buffer), you can avoid the additional heap allocation and can simply return the marshalled object directly (and not have to worry about cleanup either).
  • Jimi
    Jimi almost 5 years
    Can you provide an example how to use this class...? I'm asking because Python Pack is declared as: struct.pack(fmt, v1, v2, ...) Return a string containing the values v1, v2, ... packed according to the given format. The arguments must match the values required by the format exactly. if I call your Pack as StructConverter.Pack(new object[] { "<l", 34 }) which is the equivalent of struct.pack('<l', 34) of course I get: throw new ArgumentException("Unsupported object type found");
  • Velizar Hristov
    Velizar Hristov about 4 years
    BinaryWriter is big endian only, so it can't be used for network.