<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Seeking Best Practices for End-to-End Data Pipeline Observability in Data Engineering</title>
    <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Seeking-Best-Practices-for-End-to-End-Data-Pipeline/m-p/4800513#M11803</link>
    <description>&lt;P&gt;Hi&amp;nbsp;@Anonymous&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;I&amp;nbsp;hope the information provided is helpful. Feel free to reach out if you have any further questions.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Thanks!!&lt;/SPAN&gt;&lt;/P&gt;</description>
    <pubDate>Tue, 19 Aug 2025 06:11:46 GMT</pubDate>
    <dc:creator>v-sathmakuri</dc:creator>
    <dc:date>2025-08-19T06:11:46Z</dc:date>
    <item>
      <title>Seeking Best Practices for End-to-End Data Pipeline Observability</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Seeking-Best-Practices-for-End-to-End-Data-Pipeline/m-p/4797176#M11750</link>
      <description>&lt;P class=""&gt;Hi Fabric community,&lt;/P&gt;&lt;P class=""&gt;I've been exploring ways to enhance observability across our data pipelines in Fabric (especially Dataflows Gen2 and Lakehouses). With complex transformations and multiple data sources, tracking data quality issues upstream feels like finding needles in haystacks!&lt;/P&gt;&lt;P class=""&gt;One approach I'm testing involves implementing&amp;nbsp;data contracts&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;between ingestion and transformation layers. This helps catch schema drifts early, but I still struggle with:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;P class=""&gt;Column-level lineage visibility&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P class=""&gt;Automated anomaly detection&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P class=""&gt;Proactive alert thresholds&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P class=""&gt;Has anyone built a comprehensive monitoring solution within Fabric? I'd love to hear how you:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;&lt;P class=""&gt;Track pipeline health beyond just activity logs&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P class=""&gt;Monitor sensitive data columns (PII/PHI)&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P class=""&gt;Maintain data quality SLAs&lt;/P&gt;&lt;/LI&gt;&lt;/OL&gt;&lt;P class=""&gt;For inspiration, I've been researching solutions like&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;A href="https://www.siffletdata.com/" target="_self"&gt;Sifflet's data observability platform&lt;/A&gt;. Their approach to metadata-driven monitoring seems promising for Fabric environments. Curious if others have tried similar tools or built custom solutions using:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;P class=""&gt;Fabric REST APIs&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P class=""&gt;Purview integration&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P class=""&gt;Power BI anomaly detection&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P class=""&gt;Would appreciate any war stories or architecture diagrams!&lt;/P&gt;</description>
      <pubDate>Thu, 14 Aug 2025 13:53:52 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Seeking-Best-Practices-for-End-to-End-Data-Pipeline/m-p/4797176#M11750</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2025-08-14T13:53:52Z</dc:date>
    </item>
    <item>
      <title>Re: Seeking Best Practices for End-to-End Data Pipeline Observability</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Seeking-Best-Practices-for-End-to-End-Data-Pipeline/m-p/4797855#M11761</link>
      <description>&lt;P&gt;Hi&amp;nbsp;@Anonymous&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thank you for reaching out to Microsoft Fabric Community.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Below documentations might help in understanding the required concepts :&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;Column-level lineage visibility&lt;/LI&gt;
&lt;/UL&gt;
&lt;DIV class="lia-indent-padding-left-60px"&gt;&lt;SPAN&gt;To gain visibility into how data flows at the column level, you can use the Fabric Lineage Extractor. This tool helps trace data transformations across pipelines making it easier to audit and debug.&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV class="lia-indent-padding-left-90px"&gt;&lt;SPAN&gt;&lt;A href="https://github.com/sdetoni-prj/Fabric_LineageExtractor?tab=readme-ov-file" target="_blank"&gt; https://github.com/sdetoni-prj/Fabric_LineageExtractor?tab=readme-ov-file&lt;/A&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;UL&gt;
&lt;LI&gt;Automated Anomaly Detection&lt;/LI&gt;
&lt;/UL&gt;
&lt;DIV class="lia-indent-padding-left-60px"&gt;&lt;SPAN&gt; Built in Anomaly Detection adds "Find Anomalies" in line charts. It highlights outliers and provides natural language explanations. You can customize sensitivity and analysis fields&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV class="lia-indent-padding-left-90px"&gt;&lt;SPAN&gt;&lt;A href="https://tip2.blog.fabric.microsoft.com/en-us/blog/anomaly-detection-preview?ft=Announcements:category" target="_blank"&gt; https://tip2.blog.fabric.microsoft.com/en-us/blog/anomaly-detection-preview?ft=Announcements:category&amp;nbsp;&lt;/A&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV class="lia-indent-padding-left-60px"&gt;&lt;SPAN&gt; Multivariate Anomaly Detection trains models using Spark notebooks and apply them in real-time via Eventhouse and KQL queries. This is ideal for detecting joint anomalies across correlated metrics&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV class="lia-indent-padding-left-90px"&gt;&lt;SPAN&gt;&lt;A href="https://learn.microsoft.com/en-us/fabric/real-time-intelligence/multivariate-anomaly-detection" target="_blank"&gt; https://learn.microsoft.com/en-us/fabric/real-time-intelligence/multivariate-anomaly-detection&amp;nbsp;&lt;/A&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;UL&gt;
&lt;LI&gt;Proactive alert thresholds&lt;/LI&gt;
&lt;/UL&gt;
&lt;DIV class="lia-indent-padding-left-60px"&gt;&lt;SPAN&gt; Use Data Activator to trigger alerts based on conditions in streaming or batch data.&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV class="lia-indent-padding-left-60px"&gt;&lt;SPAN&gt; Using Purview DLP Policies, Set up alerts for sensitive data access or schema changes&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV class="lia-indent-padding-left-90px"&gt;&lt;SPAN&gt;&lt;A href="https://learn.microsoft.com/en-us/fabric/governance/use-microsoft-purview-hub?tabs=overview" target="_blank"&gt; https://learn.microsoft.com/en-us/fabric/governance/use-microsoft-purview-hub?tabs=overview&amp;nbsp;&lt;/A&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;UL&gt;
&lt;LI&gt;Track pipeline health beyond just activity logs&lt;/LI&gt;
&lt;/UL&gt;
&lt;DIV class="lia-indent-padding-left-60px"&gt;&lt;SPAN&gt; Use Dataflow Gen2 Optimizations for Fast Copy, query folding and staging Lakehouse/Warehouse to improve performance.&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV class="lia-indent-padding-left-90px"&gt;&lt;SPAN&gt;&lt;A href="https://learn.microsoft.com/en-us/fabric/data-factory/dataflow-gen2-performance-best-practices" target="_blank"&gt; https://learn.microsoft.com/en-us/fabric/data-factory/dataflow-gen2-performance-best-practices&amp;nbsp;&lt;/A&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV class="lia-indent-padding-left-60px"&gt;&lt;SPAN&gt; Fabric’s Eventhouse based monitoring collects logs and metrics across items. You can query this using KQL or SQL for performance insights&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV class="lia-indent-padding-left-90px"&gt;&lt;SPAN&gt;&lt;A href="https://learn.microsoft.com/en-us/fabric/fundamentals/workspace-monitoring-overview" target="_blank"&gt; https://learn.microsoft.com/en-us/fabric/fundamentals/workspace-monitoring-overview&amp;nbsp;&lt;/A&gt;&amp;nbsp;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;UL&gt;
&lt;LI&gt;Monitor sensitive data columns (PII/PHI)&lt;/LI&gt;
&lt;/UL&gt;
&lt;DIV class="lia-indent-padding-left-60px"&gt;&lt;SPAN&gt; Purview Hub Centralized dashboard helps to monitor sensitivity labels, data loss prevention (DLP) and audit logs.&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV class="lia-indent-padding-left-90px"&gt;&lt;SPAN&gt;&lt;A href="https://learn.microsoft.com/en-us/fabric/governance/use-microsoft-purview-hub?tabs=overview" target="_blank"&gt; https://learn.microsoft.com/en-us/fabric/governance/use-microsoft-purview-hub?tabs=overview&amp;nbsp;&lt;/A&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV class="lia-indent-padding-left-60px"&gt;&lt;SPAN&gt; Use ai.extract and ai.generate_response to detect PII directly in pipelines. This is a native, LLM-powered alternative to external libraries like Presidio&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV class="lia-indent-padding-left-90px"&gt;&lt;SPAN&gt;&lt;A href="https://community.fabric.microsoft.com/t5/Data-Engineering-Community-Blog/PII-Detection-and-Redaction-with-Fabric-AI-Functions/ba-p/4731952" target="_blank"&gt; https://community.fabric.microsoft.com/t5/Data-Engineering-Community-Blog/PII-Detection-and-Redaction-with-Fabric-AI-Functions/ba-p/4731952&amp;nbsp;&lt;/A&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;UL&gt;
&lt;LI&gt;Maintain data quality SLAs&lt;/LI&gt;
&lt;/UL&gt;
&lt;DIV class="lia-indent-padding-left-60px"&gt;&lt;SPAN&gt; To maintain data quality SLAs, you can schedule regular data profiling jobs in Fabric notebooks or Dataflows. Store results (row counts, null ratios etc) in a "data quality metrics" table in the Lakehouse/Warehouse. Power BI can then visualize SLAs&amp;nbsp; and alerts can notify you when metrics breachs&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV class="lia-indent-padding-left-60px"&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;Thank you !!&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;</description>
      <pubDate>Fri, 15 Aug 2025 10:34:20 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Seeking-Best-Practices-for-End-to-End-Data-Pipeline/m-p/4797855#M11761</guid>
      <dc:creator>v-sathmakuri</dc:creator>
      <dc:date>2025-08-15T10:34:20Z</dc:date>
    </item>
    <item>
      <title>Re: Seeking Best Practices for End-to-End Data Pipeline Observability</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Seeking-Best-Practices-for-End-to-End-Data-Pipeline/m-p/4800513#M11803</link>
      <description>&lt;P&gt;Hi&amp;nbsp;@Anonymous&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;I&amp;nbsp;hope the information provided is helpful. Feel free to reach out if you have any further questions.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Thanks!!&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 19 Aug 2025 06:11:46 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Seeking-Best-Practices-for-End-to-End-Data-Pipeline/m-p/4800513#M11803</guid>
      <dc:creator>v-sathmakuri</dc:creator>
      <dc:date>2025-08-19T06:11:46Z</dc:date>
    </item>
    <item>
      <title>Re: Seeking Best Practices for End-to-End Data Pipeline Observability</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Seeking-Best-Practices-for-End-to-End-Data-Pipeline/m-p/4806615#M11925</link>
      <description>&lt;P&gt;Hi&amp;nbsp;@Anonymous&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Could you please let us know whether the provided response helped in resolving the issue.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thank you!!&lt;/P&gt;</description>
      <pubDate>Mon, 25 Aug 2025 09:56:55 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Seeking-Best-Practices-for-End-to-End-Data-Pipeline/m-p/4806615#M11925</guid>
      <dc:creator>v-sathmakuri</dc:creator>
      <dc:date>2025-08-25T09:56:55Z</dc:date>
    </item>
    <item>
      <title>Re: Seeking Best Practices for End-to-End Data Pipeline Observability</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Seeking-Best-Practices-for-End-to-End-Data-Pipeline/m-p/4810403#M12021</link>
      <description>&lt;P&gt;Hi&amp;nbsp;@Anonymous&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Could you let us know if the response provided was helpful in resolving your issue?&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Thank you!!&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 28 Aug 2025 12:49:09 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Seeking-Best-Practices-for-End-to-End-Data-Pipeline/m-p/4810403#M12021</guid>
      <dc:creator>v-sathmakuri</dc:creator>
      <dc:date>2025-08-28T12:49:09Z</dc:date>
    </item>
    <item>
      <title>Re: Seeking Best Practices for End-to-End Data Pipeline Observability</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Seeking-Best-Practices-for-End-to-End-Data-Pipeline/m-p/4814183#M12105</link>
      <description>&lt;P&gt;Hi&amp;nbsp;@Anonymous&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Please let us know if you need any assistance from our end.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Thank you!!&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 02 Sep 2025 07:55:03 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Seeking-Best-Practices-for-End-to-End-Data-Pipeline/m-p/4814183#M12105</guid>
      <dc:creator>v-sathmakuri</dc:creator>
      <dc:date>2025-09-02T07:55:03Z</dc:date>
    </item>
  </channel>
</rss>

