Generic CSV Writer

What follows is a generic class for generating CSV files.

Usage

    CsvWriter<MyObject> writer = new CsvWriter<MyObject>();
    writer.Header = true;
    writer.AddColumn("Id", x => x.Id.ToString());
    writer.AddColumn("Name", x => x.Name);
    writer.AddColumn("Price", x => x.Price.ToString());
    writer.AddColumn("Guid", x => x.Guid);
    writer.AddColumn("Description", x => x.Description);
    writer.AddColumn("Url", x => x.Url);

    IEnumerable<MyObject> objects = Load();
    string csv = writer.WriteToString(objects);
    File.WriteAllText("objects.csv", csv);

Classes

    class CsvWriter<T>
    {
        private readonly List<CsvColumn<T>> _columns = new List<CsvColumn<T>>();

        public bool Header { get; set; }
        public bool QuoteAll { get; set; }
        public IEnumerable<CsvColumn<T>> Columns { get { return _columns; } }

        public void AddColumn(string name, Func<T, string> value)
        {
            if (name == null) throw new ArgumentNullException("name");
            if (value == null) throw new ArgumentNullException("value");

            foreach (CsvColumn<T> column in _columns)
                if (column.Name == name)
                    throw new Exception("Column with same name already added.");

            _columns.Add(new CsvColumn<T>(name, value));
        }

        public string WriteToString(IEnumerable<T> items)
        {
            if (items == null) throw new ArgumentNullException("items");

            StringWriter writer = new StringWriter();
            WriteToStream(writer, items);
            return writer.ToString();
        }

        public void WriteToStream(TextWriter stream, IEnumerable<T> items)
        {
            if (stream == null) throw new ArgumentNullException("stream");
            if (items == null) throw new ArgumentNullException("items");

            if (Header)
                WriteHeader(stream);

            foreach (T item in items)
                WriteRow(stream, item);
        }

        private void WriteHeader(TextWriter stream)
        {
            bool first = true;
            foreach (CsvColumn<T> column in _columns)
            {
                if (first)
                    first = false;
                else
                    stream.Write(',');

                WriteValue(stream, column.Name);
            }
            stream.Write('\n');
        }

        private void WriteRow(TextWriter stream, T item)
        {
            if (item == null) return;

            bool first = true;
            foreach (CsvColumn<T> column in _columns)
            {
                if (first)
                    first = false;
                else
                    stream.Write(',');

                WriteValue(stream, column.Value(item));
            }
            stream.Write('\n');
        }

        private void WriteValue(TextWriter stream, string value)
        {
            if (value == null) return;

            if (QuoteAll || ContainsAny(value, '\"', ',', '\x0A', '\x0D'))
            {
                stream.Write("\"");
                stream.Write(value.Replace("\"", "\"\""));
                stream.Write("\"");
            }
            else
            {
                stream.Write(value);
            }
        }

        private static bool ContainsAny(string value, params char[] characters)
        {
            return value.IndexOfAny(characters) != -1;
        }
    }

    class CsvColumn<T>
    {
        public string Name { get; private set; }
        public Func<T, string> Value { get; private set; }

        public CsvColumn(string name, Func<T, string> value)
        {
            if (name == null) throw new ArgumentNullException("name");
            if (value == null) throw new ArgumentNullException("value");

            Name = name;
            Value = value;
        }
    }

 

Auto ToString()

I wanted to print the contents of some objects, for debugging purposes. One option was to override my Class’s ToString method and simply use Console.WriteLine(myObject). I do this on a few classes, and suddenly I have to maintain all these ToString methods whenever I change their properties or fields.

All that extra work, just for some debug statements? I think not – I’d rather the computer do the work for me. After all, it always knows what properties my objects have. So I wrote a method for automatically generating a “pretty” snapshot of an object’s state. C# code follows.

Console.WriteLine(AutoString(DBNull.Value));
// DBNull { }

Console.WriteLine(AutoString("A String", true));
// String
// {
//   FirstChar:      A
//   Length:         8
//   m_firstChar:    A
//   m_stringLength: 8
// }

Console.WriteLine(AutoString(new Customer
{
    Id = 12,
    Name = "Smith",
    Address = new Address
    {
        City = "Springfield"
    }
}));
// Customer
// {
//   Address: TestApp.Address
//   Id:      12
//   Name:    Smith
//   Region:
// }

public static string AutoString(object obj, bool includeNonPublic = false)
{
    if (obj == null) return "null";

    Type type = obj.GetType();

    BindingFlags flags = BindingFlags.Instance | BindingFlags.Public;
    if (includeNonPublic)
        flags = flags | BindingFlags.NonPublic;

    List<MemberInfo> members = type.GetProperties(flags)
                                    .Where(property => property.GetIndexParameters().Length == 0)
                                    .Cast<MemberInfo>()
                                    .Concat(type.GetFields(flags))
                                    .OrderBy(member => member.Name)
                                    .ToList();

    StringBuilder sb = new StringBuilder();

    if (members.Count > 0)
    {
        sb.AppendLine(type.Name);
        sb.AppendLine("{");

        int longest = members.Max(m => m.Name.Length);
        foreach (MemberInfo member in members)
        {
            sb.Append("  ");
            sb.Append(member.Name);
            sb.Append(":");
            sb.Append(' ', longest - member.Name.Length + 1);
            sb.Append(GetValue(member as PropertyInfo, obj));
            sb.Append(GetValue(member as FieldInfo, obj));
            sb.AppendLine();
        }

        sb.Append("}");
    }
    else
    {
        sb.Append(type.Name);
        sb.Append(" { }");
    }

    return sb.ToString();
}

private static string GetValue(PropertyInfo property, object instance)
{
    if (property == null) return null;
    var value = property.GetValue(instance, null);
    if (value == null) return null;
    return value.ToString();
}

private static string GetValue(FieldInfo field, object instance)
{
    if (field == null) return null;
    var value = field.GetValue(instance);
    if (value == null) return null;
    return value.ToString();
}

Sequentially Accessing a Rectangular Array

We know that .NET performs optimizations when accessing rectangular arrays, but for sequential access should the inner loop be on the first or second index? Is there even a difference?

The Code

using System;

class Program
{
    static void Main(string[] args)
    {
        int size = 512;
        int count = 1000;
        int[,] array = new int[size, size];
        int total = 0;

        var watch = System.Diagnostics.Stopwatch.StartNew();
        for (int i = 0; i < count; i++)
            for (int x = 0; x < size; x++)
                for (int y = 0; y < size; y++)
                    total += array[x, y];
        watch.Stop();
        Console.WriteLine(String.Format("Sequential access by [x,y]: {0}ms", watch.ElapsedMilliseconds));

        watch = System.Diagnostics.Stopwatch.StartNew();
        for (int i = 0; i < count; i++)
            for (int x = 0; x < size; x++)
                for (int y = 0; y < size; y++)
                    total += array[y, x];
        watch.Stop();
        Console.WriteLine(String.Format("Sequential access by [y,x]: {0}ms", watch.ElapsedMilliseconds));

        Console.WriteLine(total);
        Console.Read();
    }
}

 

The Output

Sequential access by [x,y]: 825ms
Sequential access by [y,x]: 2414ms
0

 

What a difference! Incrementing the first index in the inner loop takes almost 3 times longer, probably because there are so many more cache misses.

 

The Verdict

For best performance, process your rectangular arrays by incrementing the first index in an outer loop, and the second index in an inner loop.

Concurrency Encapsulation

One of my projects is a perfect candidate for concurrency. There is a collection of entities that each need to be processed, and each can be processed independently. For the sake of Separation of Concerns, I developed a WorkManager class dedicated to performing an action on the elements of an enumeration. Before I spill the code, here is how it is used:

Entity[] myCollection = LoadEntities();
WorkManager manager = new WorkManager();
manager.ForEach(myCollection, e => e.Process());

Note the lambda expression above. It means that for each Entity in myCollection, the code will execute its Process method. The program will create a separate thread for each processor, and evenly distribute the workload across the threads.

And without further delay, the WorkManager class:

Continue reading Concurrency Encapsulation

Cleanup WSUS – Remove Computers No Longer in the Domain

One thing I love about WSUS is the ability to monitor the presence of clients. It gives me a good approximation of the last time a computer was on the network. I often use this information to help me clean missing computers out of Active Directory.

But what about when a computer is removed from the domain before it is removed from WSUS? Rather than manually checking, I wrote an IronPython script that compares the list of computers in Active Directory with the computers on WSUS. When I run this script, it lists computers that should be removed from WSUS, and deletes them for me (after prompting).

Continue reading Cleanup WSUS – Remove Computers No Longer in the Domain

Embedding IronPython

In my current programming project, I’ve embedded IronPython in a C# program. I thought I would share the basics of embedding a scripting engine. I imagine the process would be the same for any language that uses the DLR (Dynamic Language Runtime), like IronRuby. Here is a sample using IronPython 2.0 Beta 5.

using System;
using IronPython.Hosting;
using Microsoft.Scripting;
using Microsoft.Scripting.Hosting;

public class Program
{
    // Delegate matching the signature of the factorial function
    delegate int FactorialDelegate(int n);

    static void Main(string[] args)
    {
        // Our factorial function
        string[] lines = {"def factorial(n):",
                          "  for i in range(1, n):",
                          "    n = n * i",
                          "  return n"};

        string code = String.Join("\r", lines);

        // Instantiate the IronPython environment
        ScriptEngine engine = Python.CreateEngine();

        // Create a scope/module to work in
        ScriptScope scope = engine.CreateScope();

        // A little preparation
        ScriptSource source = engine.CreateScriptSourceFromString(code, SourceCodeKind.Statements);

        // Compile the code
        CompiledCode compiled = source.Compile();

        // Execute the code in the scope
        compiled.Execute(scope);

        //Now the factorial function exists in the IronPython environment. Let's use it.

        // Set x = 5
        scope.SetVariable("x", 5);

        // print factorial(x)
        ScriptSource print = engine.CreateScriptSourceFromString("print factorial(x)", SourceCodeKind.SingleStatement);
        print.Execute(scope);       //outputs 120

        // Get the result from IronPython
        int result1 = scope.Execute<int>("factorial(6)");
        Console.WriteLine(result1); //outputs 720

        // We can also call the function directly from C#
        FactorialDelegate factorial = scope.GetVariable<FactorialDelegate>("factorial");
        int result2 = factorial(7);
        Console.WriteLine(result2); //outputs 5040

        Console.Read();
    }
}

Math.BigMul Exposed

Today a friend and I were reflecting through System.Math (courtesy of IronPython) and we noticed the BigMul method:

Math.BigMul(Int32, Int32) : Int64

Why have a method just for multiplication? It seems to be a trivial reason to add a method to the .NET framework. After all, multiplication with casting does the same thing:

(long)a * (long)b

Being optimistic, I suggested that perhaps Microsoft’s BigMul is implementing a faster and more efficient multiplication algorithm. Maybe there is a clever way to multiply two 32 bit numbers without explicit casting to 64 bit. Naturally, I wrote a simple speed test.

static void Main(string[] args)
{
    int a = 40993;
    int b = 69872;
    long c = 0;

    DateTime start;
    TimeSpan length;

    Console.WriteLine("Inline multiplication");
    start = DateTime.Now;
    for (int i = 0; i < 1000000000; i++)
        c = (long)a * (long)b;
    length = DateTime.Now - start;
    Console.WriteLine(c);
    Console.WriteLine(length.ToString());
    Console.WriteLine();

    Console.WriteLine("Math.BigMul");
    start = DateTime.Now;
    for (int i = 0; i < 1000000000; i++)
        c = Math.BigMul(a, b);
    length = DateTime.Now - start;
    Console.WriteLine(c);
    Console.WriteLine(length.ToString());
    Console.WriteLine();

    Console.Read();
}

The results were not encouraging.

Continue reading Math.BigMul Exposed