<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: PySpark Notebook Using Structured Streaming with Delta Table Sink - Unsupported Operation Except in Data Engineering</title>
    <link>https://community.fabric.microsoft.com/t5/Data-Engineering/PySpark-Notebook-Using-Structured-Streaming-with-Delta-Table/m-p/3379538#M943</link>
    <description>&lt;P&gt;Thanks,&amp;nbsp;&lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/511406"&gt;@puneetvijwani&lt;/a&gt;.&amp;nbsp; It worked when I used the ABFS path but not the relative path or full URL.&lt;/P&gt;</description>
    <pubDate>Mon, 14 Aug 2023 18:41:03 GMT</pubDate>
    <dc:creator>dt3288</dc:creator>
    <dc:date>2023-08-14T18:41:03Z</dc:date>
    <item>
      <title>PySpark Notebook Using Structured Streaming with Delta Table Sink - Unsupported Operation Exception</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/PySpark-Notebook-Using-Structured-Streaming-with-Delta-Table/m-p/3379079#M941</link>
      <description>&lt;P&gt;I'm encountering difficulty reproducing the PySpark notebook example using a delta table as a streaming sink in the following training module:&amp;nbsp;&lt;A href="https://learn.microsoft.com/en-us/training/modules/work-delta-lake-tables-fabric/5-use-delta-lake-streaming-data" target="_blank" rel="noopener"&gt;https://learn.microsoft.com/en-us/training/modules/work-delta-lake-tables-fabric/5-use-delta-lake-streaming-data&lt;/A&gt;.&amp;nbsp; The following error occurs when I run the notebook:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN class=""&gt;Py4JJavaError&lt;/SPAN&gt;&lt;SPAN&gt;: An error occurred while calling o4291.load. : java.lang.UnsupportedOperationException at org.apache.hadoop.fs.http.AbstractHttpFileSystem.listStatus(AbstractHttpFileSystem.java:95) at org.apache.hadoop.fs.http.HttpsFileSystem.listStatus(HttpsFileSystem.java:23) at org.apache.spark.util.HadoopFSUtils$.listLeafFiles(HadoopFSUtils.scala:225) at org.apache.spark.util.HadoopFSUtils$.$anonfun$parallelListLeafFilesInternal$1(HadoopFSUtils.scala:95) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at scala.collection.TraversableLike.map(TraversableLike.scala:286) at scala.collection.TraversableLike.map$(TraversableLike.scala:279) at scala.collection.AbstractTraversable.map(Traversable.scala:108) at org.apache.spark.util.HadoopFSUtils$.parallelListLeafFilesInternal(HadoopFSUtils.scala:85) at org.apache.spark.util.HadoopFSUtils$.parallelListLeafFiles(HadoopFSUtils.scala:69) at org.apache.spark.sql.execution.datasources.InMemoryFileIndex$.bulkListLeafFiles(InMemoryFileIndex.scala:158) at org.apache.spark.sql.execution.datasources.InMemoryFileIndex.listLeafFiles(InMemoryFileIndex.scala:131) at org.apache.spark.sql.execution.datasources.InMemoryFileIndex.refresh0(InMemoryFileIndex.scala:94) at org.apache.spark.sql.execution.datasources.InMemoryFileIndex.&amp;lt;init&amp;gt;(InMemoryFileIndex.scala:66) at org.apache.spark.sql.execution.datasources.DataSource.createInMemoryFileIndex(DataSource.scala:567) at org.apache.spark.sql.execution.datasources.DataSource.$anonfun$sourceSchema$2(DataSource.scala:268) at org.apache.spark.sql.execution.datasources.DataSource.tempFileIndex$lzycompute$1(DataSource.scala:164) at org.apache.spark.sql.execution.datasources.DataSource.tempFileIndex$1(DataSource.scala:164) at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:169) at org.apache.spark.sql.execution.datasources.DataSource.sourceSchema(DataSource.scala:262) at org.apache.spark.sql.execution.datasources.DataSource.sourceInfo$lzycompute(DataSource.scala:118) at org.apache.spark.sql.execution.datasources.DataSource.sourceInfo(DataSource.scala:118) at org.apache.spark.sql.execution.streaming.StreamingRelation$.apply(StreamingRelation.scala:34) at org.apache.spark.sql.streaming.DataStreamReader.loadInternal(DataStreamReader.scala:196) at org.apache.spark.sql.streaming.DataStreamReader.load(DataStreamReader.scala:210) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:238) at java.lang.Thread.run(Thread.java:750)&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I've tried using both a JSON and CSV file with the following data:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="java"&gt;{
    "device":"Dev1"
    ,"status":"ok"
}
,{
    "device":"Dev2"
    ,"status":"ok"
}&lt;/LI-CODE&gt;&lt;LI-CODE lang="java"&gt;device,status
device1,ok&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;Here is the code I'm attempting to run:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;# Welcome to your new notebook
# Type here in the cell editor to add code!
from pyspark.sql.types import *
from pyspark.sql.functions import *

# Create a stream that reads JSON data from a folder
inputPath = '&amp;lt;Full HTTPS URL from file properties in lakehouse here&amp;gt;/testdata.csv'
#jsonSchema = StructType([
csvSchema = StructType([
    StructField("device", StringType(), False),
    StructField("status", StringType(), False)
])
#stream_df = spark.readStream.schema(jsonSchema).option("maxFilesPerTrigger", 1).json(inputPath)
#stream_df = spark.readStream.schema(csvSchema).option("maxFilesPerTrigger", 1).csv(inputPath)
stream_df = spark.readStream.format("csv").schema(csvSchema).option("header",True).option("maxFilesPerTrigger",1).load(inputPath)

# Write the stream to a delta table
#table_path = 'Files/delta_tables/devicetable'
#checkpoint_path = 'Files/delta_tables/checkpoint'
#delta_stream = stream_df.writeStream.format("delta").option("checkpointLocation", checkpoint_path).start(table_path)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The error occurs on lines 13, 14 or 15 -- all three variations return the same error.&amp;nbsp; I'm not an expert in PySpark yet, but the error is not very clear.&amp;nbsp; I don't see any messages related to parsing the data, and the data schema seems simple enough.&amp;nbsp; Perhaps the issue is related to dependencies?&amp;nbsp; I'm at an impasse.&lt;/P&gt;</description>
      <pubDate>Mon, 14 Aug 2023 13:33:10 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/PySpark-Notebook-Using-Structured-Streaming-with-Delta-Table/m-p/3379079#M941</guid>
      <dc:creator>dt3288</dc:creator>
      <dc:date>2023-08-14T13:33:10Z</dc:date>
    </item>
    <item>
      <title>Re: PySpark Notebook Using Structured Streaming with Delta Table Sink - Unsupported Operation Except</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/PySpark-Notebook-Using-Structured-Streaming-with-Delta-Table/m-p/3379499#M942</link>
      <description>&lt;P&gt;&lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/602722"&gt;@dt3288&lt;/a&gt;&amp;nbsp;i have used structured steaming for incremental load before and presented on one of my sessions&amp;nbsp;&lt;BR /&gt;&lt;A href="https://www.youtube.com/watch?v=bNdKX-9nXTs" target="_blank"&gt;https://www.youtube.com/watch?v=bNdKX-9nXTs&lt;/A&gt;&lt;BR /&gt;And reference notebooks are here&amp;nbsp;&lt;BR /&gt;&lt;A href="https://github.com/puneetvijwani/fabricNotebooks" target="_blank"&gt;https://github.com/puneetvijwani/fabricNotebooks&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;U&gt;Also i have tested your code it seems working fine for reading the testdata.csv as stream as i loaded in Files (lakehouse) and used relative path however you can also try copying abfss path by right clikcing the file and copy ABFS path&lt;/U&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;# Welcome to your new notebook&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;# Type here in the cell editor to add code!&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; pyspark.sql.types &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; *&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; pyspark.sql.functions &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; *&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;# Create a stream that reads JSON data from a folder&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;inputPath = &lt;/SPAN&gt;&lt;STRONG&gt;'Files/testdata.csv'&lt;/STRONG&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;#jsonSchema = StructType([&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;csvSchema = StructType([&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; StructField(&lt;/SPAN&gt;&lt;SPAN&gt;"device"&lt;/SPAN&gt;&lt;SPAN&gt;, StringType(), &lt;/SPAN&gt;&lt;SPAN&gt;False&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; StructField(&lt;/SPAN&gt;&lt;SPAN&gt;"status"&lt;/SPAN&gt;&lt;SPAN&gt;, StringType(), &lt;/SPAN&gt;&lt;SPAN&gt;False&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;])&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;#stream_df = spark.readStream.schema(jsonSchema).option("maxFilesPerTrigger", 1).json(inputPath)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;#stream_df = spark.readStream.schema(csvSchema).option("maxFilesPerTrigger", 1).csv(inputPath)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;stream_df = spark.readStream.&lt;/SPAN&gt;&lt;SPAN&gt;format&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"csv"&lt;/SPAN&gt;&lt;SPAN&gt;).schema(csvSchema).option(&lt;/SPAN&gt;&lt;SPAN&gt;"header"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;).option(&lt;/SPAN&gt;&lt;SPAN&gt;"maxFilesPerTrigger"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;SPAN&gt;1&lt;/SPAN&gt;&lt;SPAN&gt;).load(inputPath)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Mon, 14 Aug 2023 18:21:59 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/PySpark-Notebook-Using-Structured-Streaming-with-Delta-Table/m-p/3379499#M942</guid>
      <dc:creator>puneetvijwani</dc:creator>
      <dc:date>2023-08-14T18:21:59Z</dc:date>
    </item>
    <item>
      <title>Re: PySpark Notebook Using Structured Streaming with Delta Table Sink - Unsupported Operation Except</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/PySpark-Notebook-Using-Structured-Streaming-with-Delta-Table/m-p/3379538#M943</link>
      <description>&lt;P&gt;Thanks,&amp;nbsp;&lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/511406"&gt;@puneetvijwani&lt;/a&gt;.&amp;nbsp; It worked when I used the ABFS path but not the relative path or full URL.&lt;/P&gt;</description>
      <pubDate>Mon, 14 Aug 2023 18:41:03 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/PySpark-Notebook-Using-Structured-Streaming-with-Delta-Table/m-p/3379538#M943</guid>
      <dc:creator>dt3288</dc:creator>
      <dc:date>2023-08-14T18:41:03Z</dc:date>
    </item>
    <item>
      <title>Re: PySpark Notebook Using Structured Streaming with Delta Table Sink - Unsupported Operation Except</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/PySpark-Notebook-Using-Structured-Streaming-with-Delta-Table/m-p/3379554#M944</link>
      <description>&lt;P&gt;&lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/602722"&gt;@dt3288&lt;/a&gt;&amp;nbsp;&amp;nbsp;Glad to know it worked feel free to mark this as accepted "solution" if you want&amp;nbsp; and if you're feeling very kind, give me a Kudos&amp;nbsp;&lt;span class="lia-unicode-emoji" title=":grinning_face:"&gt;😀&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 14 Aug 2023 18:45:57 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/PySpark-Notebook-Using-Structured-Streaming-with-Delta-Table/m-p/3379554#M944</guid>
      <dc:creator>puneetvijwani</dc:creator>
      <dc:date>2023-08-14T18:45:57Z</dc:date>
    </item>
    <item>
      <title>Re: PySpark Notebook Using Structured Streaming with Delta Table Sink - Unsupported Operation Except</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/PySpark-Notebook-Using-Structured-Streaming-with-Delta-Table/m-p/3383908#M945</link>
      <description>&lt;P&gt;It is great, thanks for sharing&lt;/P&gt;</description>
      <pubDate>Wed, 16 Aug 2023 22:55:49 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/PySpark-Notebook-Using-Structured-Streaming-with-Delta-Table/m-p/3383908#M945</guid>
      <dc:creator>VahidDM</dc:creator>
      <dc:date>2023-08-16T22:55:49Z</dc:date>
    </item>
  </channel>
</rss>

