<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: GetScanResult endpoint does not return &amp;quot;datasourceInstances&amp;quot; list in Data Engineering</title>
    <link>https://community.fabric.microsoft.com/t5/Data-Engineering/GetScanResult-endpoint-does-not-return-quot-datasourceInstances/m-p/5178262#M16126</link>
    <description>&lt;P&gt;Hello&amp;nbsp;&lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/1275237"&gt;@v-tejrama&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you for this detailed explanation.&lt;BR /&gt;&lt;BR /&gt;I want to add that dataRetrievalState column has values&lt;/P&gt;&lt;PRE&gt;UpstreamLineageErrors; DatasetSchemaDisabledByAdmin; DatasetExpressionsDisabledByAdmin&lt;/PRE&gt;&lt;P&gt;and&lt;/P&gt;&lt;PRE&gt;DatasetSchemaDisabledByAdmin; DatasetExpressionsDisabledByAdmin&lt;/PRE&gt;&lt;P&gt;which is obviously because of the situation you mentioned.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Still, I want to ask does it make sense to go to PBI admin/app settings to check what metadata is allowed to collect or something like that (if it is possible at all. Unfortunately, I am not a PBI admin and have no clue what is inside PBI admin panel). If so, could you please suggest which settings should I ask to check.&lt;/P&gt;</description>
    <pubDate>Tue, 05 May 2026 17:10:00 GMT</pubDate>
    <dc:creator>1ng4lipt</dc:creator>
    <dc:date>2026-05-05T17:10:00Z</dc:date>
    <item>
      <title>GetScanResult endpoint does not return "datasourceInstances" list</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/GetScanResult-endpoint-does-not-return-quot-datasourceInstances/m-p/5177647#M16116</link>
      <description>&lt;P&gt;Hello!&lt;/P&gt;&lt;P&gt;I'm collecting company's PBI metadata via PBI API (Scan Jobs to be precise).&lt;BR /&gt;Following documentation, I call&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;A href="https://learn.microsoft.com/en-us/rest/api/power-bi/admin/workspace-info-post-workspace-info" target="_blank" rel="nofollow noopener ugc"&gt;PostWorkspaceInfo&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;endpoint with additional params&lt;/P&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;LI-CODE lang="markup"&gt;getArtifactUsers=True&amp;amp;lineage=True&amp;amp;datasourceDetails=True&amp;amp;datasetSchema=True&amp;amp;datasetExpressions=True&lt;/LI-CODE&gt;&lt;DIV class=""&gt;&lt;SPAN class=""&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;&lt;LI-CODE lang="python"&gt;SCAN_GETINFO_PARAMS = {
    "getArtifactUsers": "true",
    "lineage": "true",
    "datasourceDetails": "true",
    "datasetSchema": "true",
    "datasetExpressions": "true",
}


PBI_SCOPE = "https://analysis.windows.net/powerbi/api/.default"


class PowerBIOAuth2ClientCredentialsAuth(AuthBase):
    """HttpHook auth adapter around dlt OAuth2 client credentials."""

    def __init__(self, login: str, password: str) -&amp;gt; None:
        tenant_id = "..."
        client_id = "..."
        if not password:
            raise AirflowException("Power BI client secret is empty")
        self._auth = OAuth2ClientCredentials(
            access_token_url=f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token",
            client_id=client_id,
            client_secret=password,
            access_token_request_data={"scope": PBI_SCOPE},
        )

    def __call__(self, request: PreparedRequest) -&amp;gt; PreparedRequest:
        return self._auth(request)


def create_scan(
    batch_index: int,
    ch_conn_id: str,
    select_ids_sql: str,
    powerbi_conn_id: str,
    batch_size: int = 100,
) -&amp;gt; str:


"""Create a Power BI scan for one mapped workspace batch.

Args:
    batch_index: Batch number produced by dynamic task mapping.
    ch_conn_id: Airflow ClickHouse connection id used to fetch workspace ids.
    select_ids_sql: SQL query that returns workspace ids and supports
        `limit` and `offset` parameters.
    powerbi_conn_id: Airflow HTTP connection id for Power BI API access.
    batch_size: Number of workspace ids requested from ClickHouse per batch.

Returns:
    The created Power BI `scan_id`.

Raises:
    AirflowException: If the batch is empty, API response is non-202, or the
        response payload contains an API error or misses the `id` field.
"""

...


http = HttpHook(
    method="POST",
    http_conn_id=powerbi_conn_id,
    auth_type=PowerBIOAuth2ClientCredentialsAuth,
)

response = http.run(
    endpoint=f"v1.0/myorg/admin/workspaces/getInfo?{urlencode(SCAN_GETINFO_PARAMS)}",
    data=json.dumps({"workspaces": chunk}),
    headers={"Content-Type": "application/json", "Accept": "application/json"},
    extra_options={"check_response": False},
)


body: dict[str, Any] = response.json()
if body.get("error"):
    raise AirflowException(f"getInfo API error for batch {batch_index}: {body['error']}")

scan_id = body.get("id")
if not scan_id:
    raise AirflowException(f"getInfo response missing `id`: {body}")

logging.info("Created scan %s for batch %s (%s workspaces)", scan_id, batch_index, len(chunk))
return scan_id	&lt;/LI-CODE&gt;&lt;P&gt;POST request is on screenshot below (tried both 'true' and "True" - no changes)&lt;/P&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;LI-CODE lang="markup"&gt;INFO - Calling Power BI getInfo endpoint for batch 0: v1.0/myorg/admin/workspaces/getInfo?getArtifactUsers=True&amp;amp;lineage=True&amp;amp;datasourceDetails=True&amp;amp;datasetSchema=True&amp;amp;datasetExpressions=True&lt;/LI-CODE&gt;&lt;P&gt;In repsonse for GET&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;A href="https://learn.microsoft.com/en-us/rest/api/power-bi/admin/workspace-info-get-scan-result" target="_blank" rel="nofollow noopener ugc"&gt;GetScanResult&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;I expect to use "datasourceInstances" list data later instead of use&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;A href="https://learn.microsoft.com/en-us/rest/api/power-bi/admin/datasets-get-datasources-as-admin" target="_blank" rel="nofollow noopener ugc"&gt;Datasets GetDatasourcesAsAdmin&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;to get exact the same info, but I didn't get this list. Only related data I have it's "datasourceInstanceId" in "datasourceUsage" list in "datasets" list.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;"datasets": [
            {
                "id": "",
                "name": "",
                "tables": [],
                "configuredBy": "",
                "configuredById": "",
                "isEffectiveIdentityRequired": false,
                "isEffectiveIdentityRolesRequired": false,
                "refreshSchedule": {
                    "days": [
                        "Sunday",
                        "Monday",
                        "Tuesday",
                        "Wednesday",
                        "Thursday",
                        "Friday",
                        "Saturday"
                    ],
                    "times": [
                        "05:30"
                    ],
                    "enabled": true,
                    "localTimeZoneId": "UTC",
                    "notifyOption": "MailOnFailure"
                },
                "targetStorageMode": "Abf",
                "createdDate": "",
                "contentProviderType": "",
                "datasourceUsages": [
                    {
                        "datasourceInstanceId": "005f54d6-6e55-496c-93da-e2a96356b72a"
                    }
                ],
                "tags": []
            },&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I've read that datasourceDetails=True might be an issue, but I pass this param into requst url.&lt;BR /&gt;Could you help me please figure out why 'datasourceInstances' list is not in the GetScanResult response?&lt;/P&gt;</description>
      <pubDate>Mon, 04 May 2026 19:27:21 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/GetScanResult-endpoint-does-not-return-quot-datasourceInstances/m-p/5177647#M16116</guid>
      <dc:creator>1ng4lipt</dc:creator>
      <dc:date>2026-05-04T19:27:21Z</dc:date>
    </item>
    <item>
      <title>Re: GetScanResult endpoint does not return "datasourceInstances" list</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/GetScanResult-endpoint-does-not-return-quot-datasourceInstances/m-p/5178107#M16123</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/1589380"&gt;@1ng4lipt&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thank you for providing the detailed request flow. I have reviewed your implementation, and your PostWorkspaceInfo call appears to be correct. so the parameters are set appropriately. The use of true versus True will not affect the response in this context.&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;
&lt;P&gt;Notably, your scan result returns datasourceUsages with a valid datasourceInstanceId, indicating that the scan is successfully identifying the relationship between the dataset and its datasource. If there were an issue with the request, this information would typically not be present.&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;
&lt;P&gt;The absence of the datasourceInstances section generally occurs when the scan API cannot fully resolve the underlying datasource metadata. As outlined in the Microsoft documentation for GetScanResult, the API may only return certain properties depending on the metadata available in the Power BI service. As a result, the response can vary based on datasource type, connection configuration, gateway setup, or how the dataset was created.&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;
&lt;P&gt;This behavior is common with specific cloud connections, custom connectors, parameterized connections, dataflows, and certain Fabric related sources where the relationship is detected, but complete datasource details are not provided in the scan response.&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;
&lt;P&gt;Since your request is properly configured and the scan completes successfully, this appears to be related to the API’s metadata return for that datasource, rather than an issue with your code.&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;
&lt;P&gt;If you require comprehensive datasource connection details, I recommend using the Get Datasources As Admin endpoint, which is generally more reliable when datasourceInstances is missing.&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;
&lt;P&gt;For further reference, please see the following Microsoft documentation:&lt;/P&gt;
&lt;P&gt;GetScanResult API documentation&lt;BR /&gt;&lt;A title="https://learn.microsoft.com/rest/api/power-bi/admin/workspace-info-get-scan-result" href="https://learn.microsoft.com/rest/api/power-bi/admin/workspace-info-get-scan-result" rel="noreferrer noopener" target="_blank"&gt;https://learn.microsoft.com/rest/api/power-bi/admin/workspace-info-get-scan-result&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;PostWorkspaceInfo API documentation&lt;BR /&gt;&lt;A title="https://learn.microsoft.com/rest/api/power-bi/admin/workspace-info-post-workspace-info" href="https://learn.microsoft.com/rest/api/power-bi/admin/workspace-info-post-workspace-info" rel="noreferrer noopener" target="_blank"&gt;https://learn.microsoft.com/rest/api/power-bi/admin/workspace-info-post-workspace-info&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;Get Datasources As Admin documentation&lt;BR /&gt;&lt;A title="https://learn.microsoft.com/rest/api/power-bi/admin/datasets-get-datasources-as-admin" href="https://learn.microsoft.com/rest/api/power-bi/admin/datasets-get-datasources-as-admin" rel="noreferrer noopener" target="_blank"&gt;https://learn.microsoft.com/rest/api/power-bi/admin/datasets-get-datasources-as-admin&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;Thank you.&lt;/P&gt;</description>
      <pubDate>Tue, 05 May 2026 13:40:42 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/GetScanResult-endpoint-does-not-return-quot-datasourceInstances/m-p/5178107#M16123</guid>
      <dc:creator>v-tejrama</dc:creator>
      <dc:date>2026-05-05T13:40:42Z</dc:date>
    </item>
    <item>
      <title>Re: GetScanResult endpoint does not return "datasourceInstances" list</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/GetScanResult-endpoint-does-not-return-quot-datasourceInstances/m-p/5178262#M16126</link>
      <description>&lt;P&gt;Hello&amp;nbsp;&lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/1275237"&gt;@v-tejrama&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you for this detailed explanation.&lt;BR /&gt;&lt;BR /&gt;I want to add that dataRetrievalState column has values&lt;/P&gt;&lt;PRE&gt;UpstreamLineageErrors; DatasetSchemaDisabledByAdmin; DatasetExpressionsDisabledByAdmin&lt;/PRE&gt;&lt;P&gt;and&lt;/P&gt;&lt;PRE&gt;DatasetSchemaDisabledByAdmin; DatasetExpressionsDisabledByAdmin&lt;/PRE&gt;&lt;P&gt;which is obviously because of the situation you mentioned.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Still, I want to ask does it make sense to go to PBI admin/app settings to check what metadata is allowed to collect or something like that (if it is possible at all. Unfortunately, I am not a PBI admin and have no clue what is inside PBI admin panel). If so, could you please suggest which settings should I ask to check.&lt;/P&gt;</description>
      <pubDate>Tue, 05 May 2026 17:10:00 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/GetScanResult-endpoint-does-not-return-quot-datasourceInstances/m-p/5178262#M16126</guid>
      <dc:creator>1ng4lipt</dc:creator>
      <dc:date>2026-05-05T17:10:00Z</dc:date>
    </item>
    <item>
      <title>Re: GetScanResult endpoint does not return "datasourceInstances" list</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/GetScanResult-endpoint-does-not-return-quot-datasourceInstances/m-p/5178276#M16127</link>
      <description>&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="lbendlin_1-1778003052828.png" style="width: 400px;"&gt;&lt;img src="https://community.fabric.microsoft.com/t5/image/serverpage/image-id/1347493iFB5F87058540D95D/image-size/medium?v=v2&amp;amp;px=400" role="button" title="lbendlin_1-1778003052828.png" alt="lbendlin_1-1778003052828.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 05 May 2026 17:44:19 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/GetScanResult-endpoint-does-not-return-quot-datasourceInstances/m-p/5178276#M16127</guid>
      <dc:creator>lbendlin</dc:creator>
      <dc:date>2026-05-05T17:44:19Z</dc:date>
    </item>
    <item>
      <title>Re: GetScanResult endpoint does not return "datasourceInstances" list</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/GetScanResult-endpoint-does-not-return-quot-datasourceInstances/m-p/5178277#M16128</link>
      <description>&lt;P&gt;Hello&amp;nbsp;&lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/100342"&gt;@lbendlin&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;Thanks for the screenshot. I'll ask our admin to check these settings and come back to you with more questions if you don't mind &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 05 May 2026 17:46:59 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/GetScanResult-endpoint-does-not-return-quot-datasourceInstances/m-p/5178277#M16128</guid>
      <dc:creator>1ng4lipt</dc:creator>
      <dc:date>2026-05-05T17:46:59Z</dc:date>
    </item>
  </channel>
</rss>

