Strange thing (and uh I suck at debugging):
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using YTParser;
using CSVLib;
using System.IO;
using System.Windows.Forms;
using System.Data;
namespace TrendingVideos
{
public class Crawler
{
// Class instances
private Core youTubeCore;
private CsvFile csvFile;
// Main form control instances
private Label actionInstance;
private ListBox logInstance;
// Status
private int parsedItems;
private bool bTaskRunning = false;
// List of result lists
private List<List<YoutubeItem>> youTubeItems = new List<List<YoutubeItem>>();
private DataTable dataTable = new DataTable();
public Crawler(string path)
{
youTubeCore = new Core();
csvFile = new CsvFile(path, FileAccess.ReadWrite,
FileMode.OpenOrCreate);
}
public void Update(Label action, Label itemsParsed, ListBox log)
{
// Set main form instances
actionInstance = action;
logInstance = log;
// Update instances
if (bTaskRunning) action.Text = "Working"; else action.Text = "Idle";
itemsParsed.Text = parsedItems.ToString();
}
public void Dispose()
{
csvFile.Dispose();
}
public void RunTask(List<string> categories)
{
bTaskRunning = true;
foreach (string category in categories)
{
parsedItems += 1;
dataTable.Columns.Add(category).Caption = category;
try
{
List<YoutubeItem> items = youTubeCore.GetMostViewedItems(category);
if (items.Count > 0)
AddItems(items);
else
throw new Exception("No items have been found");
}
catch (Exception e)
{
MessageBox.Show(e.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}
SaveDatabase();
bTaskRunning = false;
}
private void AddItems(List<YoutubeItem> items)
{
foreach (YoutubeItem item in items)
{
string value = "Title: " + item.Title +
"\nDescription: " + item.Desc +
"\nUploader: " + item.Uploader +
"\nURL: " + item.Url +
"\nViews: " + item.Views +
"\nKeywords: " + item.Keywords;
dataTable.Rows.Add(value);
}
}
private void SaveDatabase()
{
csvFile.Write(dataTable);
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using System.Net;
namespace YTParser
{
public class Core
{
private string FixCategory(string category)
{
switch(category)
{
case "Autos & Vehicles":
return "vehicles";
break;
case "Comedy":
return "comedy";
break;
case "Education":
return "education";
break;
case "Entertainment":
return "entertainment";
break;
case "Film & Animation":
return "film";
break;
//ToDO
}
return "";
}
/// <summary>
/// Gets recent most viewed items on YouTube.
/// </summary>
/// <returns>List of YoutubeItem classes</returns>
public List<YoutubeItem> GetMostViewedItems(string category)
{
// Link preparation
string link = "";
if (category == "all")
link = "http://youtube.com/charts/videos_views?t=t";
else
link = "http://youtube.com/charts/videos_views/" + category + "?t=t";
// Expression
Regex exp = new Regex(LoadRegex("Resources\\MostViewedItem.regex"));
// Fetch data
string data = GetData(link);
// Parse data
MatchCollection matches = exp.Matches(data);
// Prepare list
List<YoutubeItem> items = new List<YoutubeItem>();
// Create items
if (matches.Count > 0)
{
foreach (Match match in matches)
{
items.Add(CreateItem(match));
}
}
else
throw new Exception("No items found");
// Return things!
return items;
}
/// <summary>
/// Gets recent most viewed items on YouTube with keyword.
/// </summary>
/// <returns>List of YoutubeItem classes</returns>
public List<YoutubeItem> GetMostViewedItemsWithKeyword(string keyword)
{
// Link preparation
string link = "http://www.youtube.com/results?search_type=videos&search_query="+keyword+"&search_sort=video_view_count";
// Expression
Regex exp = new Regex(LoadRegex("Resources\\MostViewedItemWithKeyword.regex"));
// Fetch data
string data = GetData(link);
// Parse data
MatchCollection matches = exp.Matches(data);
// Prepare list
List<YoutubeItem> items = new List<YoutubeItem>();
// Create items
if (matches.Count > 0)
{
foreach (Match match in matches)
{
items.Add(CreateItem(match));
}
}
else
throw new Exception("No items found");
// Return things!
return items;
}
private string LoadRegex(string path)
{
// Stream
FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read);
StreamReader sr = new StreamReader(fs);
// Load regex, flush streams and return expression
string regex = sr.ReadToEnd().Trim();
sr.Dispose(); fs.Dispose();
return regex;
}
private string GetData(string url)
{
// Client
WebClient webClient = new WebClient();
// Begin fetching
string data = new UTF8Encoding().GetString(webClient.DownloadData(url));
webClient.DownloadFile(url, "document.tmp");
// Flush
webClient.Dispose();
// Return the data
return data;
}
private YoutubeItem CreateItem(Match match)
{
// Spawn new item class
YoutubeItem item = new YoutubeItem();
// Set values
if (match.Captures.Count > 0)
{
item.Title = match.Captures[0].Value;
item.Desc = match.Captures[1].Value;
item.Uploader = match.Captures[2].Value;
item.Views = match.Captures[3].Value;
}
return item;
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Data;
namespace CSVLib
{
public class CsvFile
{
private FileStream fileStream;
private StreamWriter writer;
private StreamReader reader;
public CsvFile(string path, FileAccess access, FileMode mode)
{
fileStream = new FileStream(path, mode, access);
writer = new StreamWriter(fileStream);
reader = new StreamReader(fileStream);
}
public void Write(DataTable data)
{
CsvWriter.WriteToStream(writer, data, false, false);
}
public DataTable Read()
{
return CsvParser.Parse(reader.ReadToEnd());
}
public void Dispose()
{
reader.Dispose();
writer.Dispose();
fileStream.Flush();
fileStream.Dispose();
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.IO;
namespace CSVLib
{
public class CsvWriter
{
public static string WriteToString(DataTable table, bool header, bool quoteall)
{
StringWriter writer = new StringWriter();
WriteToStream(writer, table, header, quoteall);
return writer.ToString();
}
public static void WriteToStream(TextWriter stream, DataTable table, bool header, bool quoteall)
{
if (header)
{
for (int i = 0; i < table.Columns.Count; i++)
{
WriteItem(stream, table.Columns[i].Caption, quoteall);
if (i < table.Columns.Count - 1)
stream.Write(',');
else
stream.Write('\n');
}
}
foreach (DataRow row in table.Rows)
{
for (int i = 0; i < table.Columns.Count; i++)
{
WriteItem(stream, row[i], quoteall);
if (i < table.Columns.Count - 1)
stream.Write(',');
else
stream.Write('\n');
}
}
}
private static void WriteItem(TextWriter stream, object item, bool quoteall)
{
if (item == null)
return;
string s = item.ToString();
if (quoteall || s.IndexOfAny("\",\x0A\x0D".ToCharArray()) > -1)
stream.Write("\"" + s.Replace("\"", "\"\"") + "\"");
else
stream.Write(s);
}
}
}