Decode Yahoo Messenger archive in c#

Filed Under (c#) by The Chef on 01-04-2010

Tagged Under : , ,

One of the better features of yahoo messenger is the archive viewer. Unlike msn messenger, yahoo stores the archive in a structured format. One drawback of the archive viewer is that you have to be online and logged into yahoo to view your archives.

The messages in the archive file are not encrypted; yahoo uses a simple XOR algorithm to encode the messages (so much for security !!) .Every .dat file begins with a timestamp, all messages are also preceded by a timestamp. Every archived message has a 16-byte header at the beginning. Take a look at a typical header and message

0000:0000  B6 5B 7B 40 00 00 00 00 01 00 00 00 00 00 00 00
0000:0010  00 00 00 00 B6 5B 7B 40 06 00 00 00 01 00 00 00
0000:0020  03 00 00 00 1E 0C 32 00 00 00 00 D7 66 7B 40 06

The header starts from B6 5B 7B 40 and ends at 03 00 00 00 .
The first 5 bytes of the header is the timestamp representing the time at which the message was sent or received.
This is followed by 3 reserved bytes always set to 00
The 9th byte indicates whether the message was received by a user or sent by the user to the buddy. If the value is 01 it indicates the message was received and if it is 00 it indicates that the message was sent by the user to the buddy.
This is again followed by 3 reserved bytes always set to 00
The 13th byte indicates the length of the message that is to follow
This is again followed by 3 reserved bytes always set to 00
The message is encoded using the XOR algorithm with the user id being one of the keys.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Text.RegularExpressions;

namespace YMDecoder
{
    class ymarchive
    {
        public List<msg> Conversation = new List<msg>();
        public String Username;
        public String Buddy;

        public override String  ToString()
        {
            String res = "<style>.msg{	font-family:arial,serif;font-size: 12px; } .nick{	font-size: 12px;color: #0000aa;	font-weight: bold;} .me{font-size: 12px;color: red;	font-weight: bold;}</style>";
            foreach (msg msg in Conversation)
            {
                res = String.Format("{0}<span class=\"time\">[{1}]</span> <span class=\"{4}\">{2}:</span> <span class=\"msg\">{3}</span><br>", res, msg.timestamp, msg.Direction == direction.sent ? Username : Buddy, msg.message, msg.Direction == direction.sent ? "me" : "nick");
            }
            return res;
        }

        public ymarchive(String ArchiveFile, String buddy)
        {
            this.Buddy = buddy;

            ASCIIEncoding encoding = new System.Text.ASCIIEncoding();

            FileStream archive = File.OpenRead(ArchiveFile);

            FileInfo fi = new FileInfo(ArchiveFile);

            this.Username = Regex.Match(fi.Name, @"\d+-(.+?)\.dat").Groups[1].Value;

            byte[] key = encoding.GetBytes(this.Username);

            byte[] padding3 = new byte[3];
            byte[] padding2 = new byte[2];
            byte[] endmsg = new byte[4];
            String dbg0 = String.Empty;

            while (archive.Position < archive.Length)
            {

                msg msg = new msg();
                byte[] timestamp_raw = new byte[5];
                archive.Read(timestamp_raw, 0, 5);
                msg.timestamp = ByteArrayToDateTime(timestamp_raw);
                archive.Read(padding3, 0, padding3.Length);
                byte[] sense = new byte[1];
                archive.Read(sense, 0, sense.Length);
                msg.Direction = sense[0] == 0 ? direction.sent : direction.received;
                archive.Read(padding3, 0, padding3.Length);
                byte[] length = new byte[2];
                archive.Read(length, 0, length.Length);
                archive.Read(padding3, 0, padding2.Length);
                byte[] text = new byte[length[0] + 256 * length[1]];
                archive.Read(text, 0, length[0] + 256 * length[1]);

                text = decode(text, key);
                ASCIIEncoding enc = new System.Text.ASCIIEncoding();
                String txt =enc.GetString(text);
                msg.message = txt;
                this.Conversation.Add(msg);
                archive.Read(endmsg, 0, endmsg.Length);
            }
            File.WriteAllText("dbg0.txt", dbg0);
        }

        private byte[] decode(byte[] crypt, byte[] key)
        {
            byte[] res = new byte[crypt.Length];
            for (int i = 0; i < crypt.Length; i++)
            {
                byte chr = (byte)(crypt[i] ^ key[i % key.Length]);
                if (chr > 31)
                {
                    res[i] = chr;
                }
                else
                {
                    res[i] = 32;
                }
                crypt[i] ^= key[i % key.Length];
            }
            return res;
        }

        private DateTime ByteArrayToDateTime(byte[] arr)
        {
            uint ts = BitConverter.ToUInt32(arr, 0);
            DateTime origin = new DateTime(1970, 1, 1, 0, 0, 0, 0);
            return origin.AddSeconds(ts);
        }
    }

    public class msg
    {
        public DateTime timestamp;
        public direction Direction;
        public String message;
    }

    public enum direction
    {
        received,
        sent
    }
}