List of tables used in an SQL Query
Is there a way how to get a list of tables used within an SQL query? Example : I have something like :
SELECT * FROM Table t JOIN OtherTable ON t.id=OtherTable.t_id
and I would expect to get
Table, OtherTable
Thanks
One solution using C# is to import Microsoft.SqlServer.TransactSql.ScriptDom
(I found the dll at C:\Program Files (x86)\Microsoft SQL Server\120\SDK\Assemblies\Microsoft.SqlServer.TransactSql.ScriptDom.dll
) then do the following:
private List<string> GetTableNamesFromQueryString(string query)
{
IList<ParseError> errors = new List<ParseError>();
IList<TSqlParserToken> queryTokens;
List<string> output = new List<string>(16);
StringBuilder sb = new StringBuilder(128);
TSql120Parser parser = new TSql120Parser(true);
TSqlTokenType[] fromTokenTypes = new TSqlTokenType[2]
{
TSqlTokenType.From,
TSqlTokenType.Join
};
TSqlTokenType[] identifierTokenTypes = new TSqlTokenType[2]
{
TSqlTokenType.Identifier,
TSqlTokenType.QuotedIdentifier
};
using (System.IO.TextReader tReader = new System.IO.StringReader(query))
{
queryTokens = parser.GetTokenStream(tReader, out errors);
if (errors.Count > 0) { return errors.Select(e=>"Error: " + e.Number + " Line: " + e.Line + " Column: " + e.Column + " Offset: " + e.Offset + " Message: " + e.Message).ToList(); }
for (int i = 0; i < queryTokens.Count; i++)
{
if(fromTokenTypes.Contains(queryTokens[i].TokenType))
{
for (int j = i + 1; j < queryTokens.Count; j++)
{
if (queryTokens[j].TokenType == TSqlTokenType.WhiteSpace) { continue; }
else if (identifierTokenTypes.Contains(queryTokens[j].TokenType))
{
sb.Clear();
GetQuotedIdentifier(queryTokens[j], sb); //Change Identifiers to QuotedIdentifier (text only)
while (j + 2 < queryTokens.Count && queryTokens[j + 1].TokenType == TSqlTokenType.Dot && identifierTokenTypes.Contains(queryTokens[j + 2].TokenType))
{
sb.Append(queryTokens[j + 1].Text);
GetQuotedIdentifier(queryTokens[j + 2], sb); //Change Identifiers to QuotedIdentifier (text only)
j += 2;
}
output.Add(sb.ToString());
break; //exit the loop
}
else { break; } //exit the loop if token is not a FROM, a JOIN, or white space.
}
}
}
return output.Distinct().OrderBy(tableName => tableName).ToList();
}
}
private void GetQuotedIdentifier(TSqlParserToken token, StringBuilder sb)
{
switch(token.TokenType)
{
case TSqlTokenType.Identifier: sb.Append('[').Append(token.Text).Append(']'); return;
case TSqlTokenType.QuotedIdentifier: sb.Append(token.Text); return;
default: throw new ArgumentException("Error: expected TokenType of token should be TSqlTokenType.Identifier or TSqlTokenType.QuotedIdentifier");
}
}
I came up with this after trying to get this answer to work.
you can use this sql script right after your query. It will return a list of tables used in the last executed query:
SELECT Field1, Field2
FROM Table t JOIN OtherTable ON t.id=OtherTable.t_id
;WITH vwQueryStats AS(
SELECT
COALESCE(OBJECT_NAME(s2.objectid),'Ad-Hoc') AS ProcName
,execution_count
,s2.objectid
,(
SELECT TOP 1
SUBSTRING(s2.TEXT,statement_start_offset / 2+1
,( ( CASE WHEN statement_end_offset = -1
THEN (LEN(CONVERT(NVARCHAR(MAX),s2.TEXT)) * 2)
ELSE statement_end_offset END)- statement_start_offset) / 2+1)) AS sql_statement
,last_execution_time
FROM sys.dm_exec_query_stats AS s1
CROSS APPLY sys.dm_exec_sql_text(sql_handle) AS s2
)
SELECT TOP 1 *
INTO #lastQueryStats
FROM vwQueryStats x
WHERE sql_statement NOT like 'WITH vwQueryStats AS%'
ORDER BY last_execution_time DESC
SELECT
TABLE_NAME
FROM #lastQueryStats, INFORMATION_SCHEMA.TABLES tab
WHERE CHARINDEX( tab.TABLE_NAME, sql_statement) > 0
DROP TABLE #lastQueryStats
I've taken the query that retrieves the last executed query from this post and I modified it a bit to match with your example.
The output will be as you requested:
Table
OtherTable
Then if you want to have them comma separated you can do:
DECLARE @tableNames VARCHAR(MAX)
SELECT @tableNames = COALESCE(@tableNames + ', ', '') + TABLE_NAME
FROM #lastQueryStats, INFORMATION_SCHEMA.TABLES tab
WHERE CHARINDEX( tab.TABLE_NAME, sql_statement) > 0
SELECT @tableNames
However you should be wary that in a 'usual' production or QA environment with thousands of query executed concurrently this mightn't work as another query could be executed in between your first query and the query that extracts info from db stats.
Hope it helps
Code below is based on Trisped's answer, but modified to work with fully qualified table names that omit a schema name, and a few cleanups/optimizations:
public class Parser
{
public static List<string> GetTableNamesFromQueryString(string query)
{
var output = new List<string>();
var sb = new StringBuilder();
var parser = new TSql120Parser(true);
var fromTokenTypes = new[]
{
TSqlTokenType.From,
TSqlTokenType.Join
};
var identifierTokenTypes = new[]
{
TSqlTokenType.Identifier,
TSqlTokenType.QuotedIdentifier
};
using (System.IO.TextReader tReader = new System.IO.StringReader(query))
{
IList<ParseError> errors;
var queryTokens = parser.GetTokenStream(tReader, out errors);
if (errors.Any())
{
return errors
.Select(e => string.Format("Error: {0}; Line: {1}; Column: {2}; Offset: {3}; Message: {4};", e.Number, e.Line, e.Column, e.Offset, e.Message))
.ToList();
}
for (var i = 0; i < queryTokens.Count; i++)
{
if (fromTokenTypes.Contains(queryTokens[i].TokenType))
{
for (var j = i + 1; j < queryTokens.Count; j++)
{
if (queryTokens[j].TokenType == TSqlTokenType.WhiteSpace)
{
continue;
}
if (identifierTokenTypes.Contains(queryTokens[j].TokenType))
{
sb.Clear();
GetQuotedIdentifier(queryTokens[j], sb);
while (j + 2 < queryTokens.Count
&& queryTokens[j + 1].TokenType == TSqlTokenType.Dot
&& (queryTokens[j + 2].TokenType == TSqlTokenType.Dot || identifierTokenTypes.Contains(queryTokens[j + 2].TokenType)))
{
sb.Append(queryTokens[j + 1].Text);
if (queryTokens[j + 2].TokenType == TSqlTokenType.Dot)
{
if (queryTokens[j - 1].TokenType == TSqlTokenType.Dot)
GetQuotedIdentifier(queryTokens[j + 1], sb);
j++;
}
else
{
GetQuotedIdentifier(queryTokens[j + 2], sb);
j += 2;
}
}
output.Add(sb.ToString());
}
break;
}
}
}
return output.Distinct().OrderBy(tableName => tableName).ToList();
}
}
private static void GetQuotedIdentifier(TSqlParserToken token, StringBuilder sb)
{
switch (token.TokenType)
{
case TSqlTokenType.Identifier:
sb.Append('[').Append(token.Text).Append(']');
break;
case TSqlTokenType.QuotedIdentifier:
case TSqlTokenType.Dot:
sb.Append(token.Text);
break;
default: throw new ArgumentException("Error: expected TokenType of token should be TSqlTokenType.Dot, TSqlTokenType.Identifier, or TSqlTokenType.QuotedIdentifier");
}
}
}
One hackish way you could accomplish this would be to explicitly name the fields in your query and prefix them with the table name, e.g.
SELECT Field1 As "OtherTable.Field1",
Field2 As "Table.Field2"
FROM Table t JOIN OtherTable ON t.id=OtherTable.t_id
Essentially, you're providing your own metadata in the query results. After your query returns, look at the column names and implement custom logic to split out the table names.