Is there a way to programmatically determine if a font file has a specific Unicode Glyph?
I'm working on a project that generates PDFs that can contain fairly complex math and science formulas. The text is rendered in Times New Roman, which has pretty good Unicode coverage, but not complete. We have a system in place to swap in a more Unicode complete font for code points that don't have a glyph in TNR (like most of the "stranger" math symbols,) but I can't seem to find a way to query the *.ttf file to see if a given glyph is present. So far, I've just hard-coded a lookup table of which code points are present, but I'd much prefer an automatic solution.
I'm using VB.Net in a web system under ASP.net, but solutions in any programming language/environment would be appreciated.
Edit: The win32 solution looks excellent, but the specific case I'm trying to solve is in an ASP.Net web system. Is there a way to do this without including the windows API DLLs into my web site?
Solution 1:
Here's a pass at it using c# and the windows API.
[DllImport("gdi32.dll")]
public static extern uint GetFontUnicodeRanges(IntPtr hdc, IntPtr lpgs);
[DllImport("gdi32.dll")]
public extern static IntPtr SelectObject(IntPtr hDC, IntPtr hObject);
public struct FontRange
{
public UInt16 Low;
public UInt16 High;
}
public List<FontRange> GetUnicodeRangesForFont(Font font)
{
Graphics g = Graphics.FromHwnd(IntPtr.Zero);
IntPtr hdc = g.GetHdc();
IntPtr hFont = font.ToHfont();
IntPtr old = SelectObject(hdc, hFont);
uint size = GetFontUnicodeRanges(hdc, IntPtr.Zero);
IntPtr glyphSet = Marshal.AllocHGlobal((int)size);
GetFontUnicodeRanges(hdc, glyphSet);
List<FontRange> fontRanges = new List<FontRange>();
int count = Marshal.ReadInt32(glyphSet, 12);
for (int i = 0; i < count; i++)
{
FontRange range = new FontRange();
range.Low = (UInt16)Marshal.ReadInt16(glyphSet, 16 + i * 4);
range.High = (UInt16)(range.Low + Marshal.ReadInt16(glyphSet, 18 + i * 4) - 1);
fontRanges.Add(range);
}
SelectObject(hdc, old);
Marshal.FreeHGlobal(glyphSet);
g.ReleaseHdc(hdc);
g.Dispose();
return fontRanges;
}
public bool CheckIfCharInFont(char character, Font font)
{
UInt16 intval = Convert.ToUInt16(character);
List<FontRange> ranges = GetUnicodeRangesForFont(font);
bool isCharacterPresent = false;
foreach (FontRange range in ranges)
{
if (intval >= range.Low && intval <= range.High)
{
isCharacterPresent = true;
break;
}
}
return isCharacterPresent;
}
Then, given a char toCheck that you want to check and a Font theFont to test it against...
if (!CheckIfCharInFont(toCheck, theFont) {
// not present
}
Same code using VB.Net
<DllImport("gdi32.dll")> _
Public Shared Function GetFontUnicodeRanges(ByVal hds As IntPtr, ByVal lpgs As IntPtr) As UInteger
End Function
<DllImport("gdi32.dll")> _
Public Shared Function SelectObject(ByVal hDc As IntPtr, ByVal hObject As IntPtr) As IntPtr
End Function
Public Structure FontRange
Public Low As UInt16
Public High As UInt16
End Structure
Public Function GetUnicodeRangesForFont(ByVal font As Font) As List(Of FontRange)
Dim g As Graphics
Dim hdc, hFont, old, glyphSet As IntPtr
Dim size As UInteger
Dim fontRanges As List(Of FontRange)
Dim count As Integer
g = Graphics.FromHwnd(IntPtr.Zero)
hdc = g.GetHdc()
hFont = font.ToHfont()
old = SelectObject(hdc, hFont)
size = GetFontUnicodeRanges(hdc, IntPtr.Zero)
glyphSet = Marshal.AllocHGlobal(CInt(size))
GetFontUnicodeRanges(hdc, glyphSet)
fontRanges = New List(Of FontRange)
count = Marshal.ReadInt32(glyphSet, 12)
For i = 0 To count - 1
Dim range As FontRange = New FontRange
range.Low = Marshal.ReadInt16(glyphSet, 16 + (i * 4))
range.High = range.Low + Marshal.ReadInt16(glyphSet, 18 + (i * 4)) - 1
fontRanges.Add(range)
Next
SelectObject(hdc, old)
Marshal.FreeHGlobal(glyphSet)
g.ReleaseHdc(hdc)
g.Dispose()
Return fontRanges
End Function
Public Function CheckIfCharInFont(ByVal character As Char, ByVal font As Font) As Boolean
Dim intval As UInt16 = Convert.ToUInt16(character)
Dim ranges As List(Of FontRange) = GetUnicodeRangesForFont(font)
Dim isCharacterPresent As Boolean = False
For Each range In ranges
If intval >= range.Low And intval <= range.High Then
isCharacterPresent = True
Exit For
End If
Next range
Return isCharacterPresent
End Function
Solution 2:
Scott's answer is good. Here is another approach that is probably faster if checking just a couple of strings per font (in our case 1 string per font). But probably slower if you are using one font to check a ton of text.
[DllImport("gdi32.dll", EntryPoint = "CreateDC", CharSet = CharSet.Auto, SetLastError = true)]
private static extern IntPtr CreateDC(string lpszDriver, string lpszDeviceName, string lpszOutput, IntPtr devMode);
[DllImport("gdi32.dll", ExactSpelling = true, SetLastError = true)]
private static extern bool DeleteDC(IntPtr hdc);
[DllImport("Gdi32.dll")]
private static extern IntPtr SelectObject(IntPtr hdc, IntPtr hgdiobj);
[DllImport("Gdi32.dll", CharSet = CharSet.Unicode)]
private static extern int GetGlyphIndices(IntPtr hdc, [MarshalAs(UnmanagedType.LPWStr)] string lpstr, int c,
Int16[] pgi, int fl);
/// <summary>
/// Returns true if the passed in string can be displayed using the passed in fontname. It checks the font to
/// see if it has glyphs for all the chars in the string.
/// </summary>
/// <param name="fontName">The name of the font to check.</param>
/// <param name="text">The text to check for glyphs of.</param>
/// <returns></returns>
public static bool CanDisplayString(string fontName, string text)
{
try
{
IntPtr hdc = CreateDC("DISPLAY", null, null, IntPtr.Zero);
if (hdc != IntPtr.Zero)
{
using (Font font = new Font(new FontFamily(fontName), 12, FontStyle.Regular, GraphicsUnit.Point))
{
SelectObject(hdc, font.ToHfont());
int count = text.Length;
Int16[] rtcode = new Int16[count];
GetGlyphIndices(hdc, text, count, rtcode, 0xffff);
DeleteDC(hdc);
foreach (Int16 code in rtcode)
if (code == 0)
return false;
}
}
}
catch (Exception)
{
// nada - return true
Trap.trap();
}
return true;
}
Solution 3:
FreeType is a library that can read TrueType font files (among others) and can be used to query the font for a specific glyph. However, FreeType is designed for rendering, so using it might cause you to pull in more code than you need for this solution.
Unfortunately, there's not really a clear solution even within the world of OpenType / TrueType fonts; the character-to-glyph mapping has about a dozen different definitions depending on the type of font and what platform it was originally designed for. You might try to look at the cmap table definition in Microsoft's copy of the OpenType spec, but it's not exactly easy reading.