If you have HTML string in C# and you want to get any particular value of any attribute
then you can use below function which refer Regex to get its value.
In below
program I am extraing “src” attribute value of HTML “img" tag.
Input string : <img alt="" src="/MediaCenter/PublishImages/DSC_0134.jpg"
width="150" style="border:0px solid" />
Output : /MediaCenter/PublishImages/DSC_0134.jpg
Code:
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
namespace Rextester
{
public class Program
{
public static void Main(string[] args)
{
string s = "<img alt=\"\"
src=\"/MediaCenter/PublishImages/DSC_0134.jpg\"
width=\"150\" style=\"border:0px solid\" />";
var srcs = GetSrcInHTMLImgString(s);
Console.WriteLine(srcs[0]);//Output: /MediaCenter/PublishImages/DSC_0134.jpg
}
public static List<string> GetSrcInHTMLImgString(string htmlString)
{
List<string> srcs = new List<string>();
string pattern = @"(?<=src="").*?(?="")";
Regex rgx = new Regex(pattern,
RegexOptions.IgnoreCase);
MatchCollection matches =
rgx.Matches(htmlString);
for (int i = 0, l
= matches.Count; i < l; i++)
{
string d = matches[i].Value;
srcs.Add(d);
}
return srcs;
}
}
}
We can make
this function more generic where we will pass any attribute name and get the
value as
public static List<string> GetAttributeNameInHTMLString(string htmlString, string attributeName)
{
List<string> attributeValues = new List<string>();
string pattern = string.Format(@"(?<={0}="").*?(?="")", attributeName);
Regex rgx = new Regex(pattern, RegexOptions.IgnoreCase);
MatchCollection matches = rgx.Matches(htmlString);
for (int i = 0, l
= matches.Count; i < l; i++)
{
string d = matches[i].Value;
attributeValues.Add(d);
}
return attributeValues;
}
Calling
this function as
string s = "<img alt=\"\"
src=\"/MediaCenter/PublishingImages/DSC_0134.jpg\" width=\"150\"
style=\"border:0px solid\" />";
var widths = GetAttributeNameInHTMLString(s, "width");
Console.WriteLine(widths[0]); //Output : 150
var styles = GetAttributeNameInHTMLString(s, "style");
Console.WriteLine(styles[0]); //Output
: border:0px solid
Its a small
function but very handy.
To test it,
you can use C# online compiler “.NET
Fiddle” instead of creating console application in Visual Studio. Many
times I am using these online tools for quick testing.
Let me know
if you have any better idea to get attribute value from HTML string.
No comments:
Post a Comment