You had the right idea. I came up with this:
Code:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.Office.Interop.Word;
namespace WordReader
{
class Program
{
static void Main(string[] args)
{
Application app = new Application();
string path = Environment.CurrentDirectory + @"\..\Albums.docx";
Document doc = app.Documents.Open(path, ReadOnly: true);
Regex pattern = new Regex(@"^(?<artist>.+) \((?<title>.+)\) - (?<year>.+),(?<genre>.+)");
List<Album> albums = new List<Album>();
foreach(Paragraph paragraph in doc.Paragraphs)
{
Match match = pattern.Match(paragraph.Range.Text);
if (match.Success)
{
Album album = new Album();
album.Artist = match.Groups["artist"].Value;
album.Title = match.Groups["title"].Value;
album.Year = match.Groups["year"].Value;
album.Genre = match.Groups["genre"].Value;
albums.Add(album);
}
}
app.Documents.Close();
((_Application)app).Quit();
foreach (Album album in albums)
{
Console.WriteLine("Artist: {0}", album.Artist);
Console.WriteLine("Title: {0}", album.Title);
Console.WriteLine("Year: {0}", album.Year);
Console.WriteLine("Genre: {0}", album.Genre);
Console.WriteLine();
}
}
struct Album
{
public string Artist;
public string Title;
public string Year;
public string Genre;
}
}
}
Output:
Code:
Artist: Metallica
Title: Ride the Lightning
Year: 1985
Genre: Metal
Artist: Metallica
Title: Master of Puppets
Year: 1986
Genre: Metal