Power BI is turning 10! Tune in for a special live episode on July 24 with behind-the-scenes stories, product evolution highlights, and a sneak peek at what’s in store for the future.
Save the dateEnhance your career with this limited time 50% discount on Fabric and Power BI exams. Ends August 31st. Request your voucher.
Hi
I am trying to read lots of PDF files in which I have my data to PowerQuery, however I noticed that the data is taken from the first page only of each PDF file, is there a way to read all pages of my PDFs?
Here is my advanced editor's code, don't mind the other steps as they are mainly to transform my data
let
Source = Folder.Files("C:\ETAT DES TOURNEES DU 2024"),
#"Fichiers masqués filtrés1" = Table.SelectRows(Source, each [Attributes]?[Hidden]? <> true),
#"Appeler une fonction personnalisée1" = Table.AddColumn(#"Fichiers masqués filtrés1", "Transformer le fichier", each #"Transformer le fichier"([Content])),
#"Colonnes renommées1" = Table.RenameColumns(#"Appeler une fonction personnalisée1", {"Name", "Source.Name"}),
#"Autres colonnes supprimées1" = Table.SelectColumns(#"Colonnes renommées1", {"Source.Name", "Transformer le fichier"}),
#"Colonne de tables développée1" = Table.ExpandTableColumn(#"Autres colonnes supprimées1", "Transformer le fichier", Table.ColumnNames(#"Transformer le fichier"(#"Exemple de fichier"))),
#"Type modifié" = Table.TransformColumnTypes(#"Colonne de tables développée1",{{"Source.Name", type text}, {"Column1", type text}, {"Column2", type text}, {"Column3", type text}, {"Column4", type text}, {"Column5", type text}, {"Column6", type text}, {"Column7", type text}}),
#"Lignes filtrées" = Table.SelectRows(#"Type modifié", each ([Column3] <> "ETAT DES TOURNEES CAMIONS")),
#"Rempli vers le bas" = Table.FillDown(#"Lignes filtrées",{"Column1"}),
#"Lignes filtrées1" = Table.SelectRows(#"Rempli vers le bas", each ([Column1] <> null)),
#"Personnalisée ajoutée" = Table.AddColumn(#"Lignes filtrées1", "Personnalisé", each if ([Column2] = null or [Column2] = "") and [Column3] <> null and [Column3] <> "" then [Column1] else null),
#"Rempli vers le bas1" = Table.FillDown(#"Personnalisée ajoutée",{"Personnalisé"}),
#"Colonnes permutées" = Table.ReorderColumns(#"Rempli vers le bas1",{"Source.Name", "Column1", "Personnalisé", "Column2", "Column3", "Column4", "Column5", "Column6", "Column7"}),
#"Colonnes supprimées" = Table.RemoveColumns(#"Colonnes permutées",{"Column2"}),
#"Personnalisée ajoutée1" = Table.AddColumn(#"Colonnes supprimées", "A supprimer", each if ([Column1] = [Personnalisé]) then "Oui" else "Non"),
#"Lignes filtrées2" = Table.SelectRows(#"Personnalisée ajoutée1", each ([A supprimer] = "Non") and ([Column3] <> null) and ([Column4] <> "PROCEDURE DE#(lf)L'ETAT DE TOURNEE")),
#"Valeur remplacée" = Table.ReplaceValue(#"Lignes filtrées2","-","",Replacer.ReplaceText,{"Source.Name"}),
#"Personnalisée ajoutée2" = Table.AddColumn(#"Valeur remplacée", "Personnalisé.1", each Date.FromText(Text.Select([Source.Name], {"0".."9"} ), [Format="ddMMyyyy", Culture="fr-FR"])),
#"Lignes filtrées3" = Table.SelectRows(#"Personnalisée ajoutée2", each true),
#"Rempli vers le bas2" = Table.FillDown(#"Lignes filtrées3",{"Personnalisé.1"}),
#"Erreurs supprimées" = Table.RemoveRowsWithErrors(#"Rempli vers le bas2", {"Personnalisé.1"}),
#"Type modifié1" = Table.TransformColumnTypes(#"Erreurs supprimées",{{"Personnalisé.1", type date}}),
#"Valeur remplacée1" = Table.ReplaceValue(#"Type modifié1",".","",Replacer.ReplaceText,{"Column5", "Column6", "Column8", "Column7"}),
#"Personnalisée ajoutée3" = Table.AddColumn(#"Valeur remplacée1", "Tonnage", each let
textCol5 = Text.Remove(Text.From([Column5]), {"0".."9", ",", " "}),
textCol6 = Text.Remove(Text.From([Column6]), {"0".."9", ",", " "}),
textCol7 = Text.Remove(Text.From([Column7]), {"0".."9", ",", " "}),
textCol8 = Text.Remove(Text.From([Column8]), {"0".."9", ",", " "}),
isNumeric5 = Text.Length(textCol5) = 0 and [Column5] <> null,
isNumeric6 = Text.Length(textCol6) = 0 and [Column6] <> null,
isNumeric7 = Text.Length(textCol7) = 0 and [Column7] <> null,
isNumeric8 = Text.Length(textCol8) = 0 and [Column8] <> null,
result = if isNumeric5 then [Column5]
else if isNumeric6 then [Column6]
else if isNumeric7 then [Column7]
else if isNumeric8 then [Column8]
else null
in
result),
#"Personnalisée ajoutée4" = Table.AddColumn(#"Personnalisée ajoutée3", "Suite client", each let
textCol5 = Text.Remove(Text.From([Column5]), {"0".."9", ",", " "}),
textCol6 = Text.Remove(Text.From([Column6]), {"0".."9", ",", " "}),
textCol7 = Text.Remove(Text.From([Column7]), {"0".."9", ",", " "}),
textCol8 = Text.Remove(Text.From([Column8]), {"0".."9", ",", " "}),
isNumeric5 = Text.Length(textCol5) = 0 and [Column5] <> null,
isNumeric6 = Text.Length(textCol6) = 0 and [Column6] <> null,
isNumeric7 = Text.Length(textCol7) = 0 and [Column7] <> null,
isNumeric8 = Text.Length(textCol8) = 0 and [Column8] <> null,
result = if isNumeric5 then null
else if isNumeric6 then [Column5]
else if isNumeric7 then Text.Combine({Text.From([Column5]),Text.From([Column6])}," ")
else if isNumeric8 then Text.Combine({Text.From([Column5]),Text.From([Column6]),Text.From([Column7])}," ")
else null
in
result),
#"Personnalisée ajoutée5" = Table.AddColumn(#"Personnalisée ajoutée4", "Commentaire", each let
textCol5 = Text.Remove(Text.From([Column5]), {"0".."9", ",", " "}),
textCol6 = Text.Remove(Text.From([Column6]), {"0".."9", ",", " "}),
textCol7 = Text.Remove(Text.From([Column7]), {"0".."9", ",", " "}),
textCol8 = Text.Remove(Text.From([Column8]), {"0".."9", ",", " "}),
isNumeric5 = Text.Length(textCol5) = 0 and [Column5] <> null and Text.Contains(Text.From([Column5]),","),
isNumeric6 = Text.Length(textCol6) = 0 and [Column6] <> null and Text.Contains(Text.From([Column6]),","),
isNumeric7 = Text.Length(textCol7) = 0 and [Column7] <> null and Text.Contains(Text.From([Column7]),","),
isNumeric8 = Text.Length(textCol8) = 0 and [Column8] <> null and Text.Contains(Text.From([Column8]),","),
result = if isNumeric5 then Text.Combine({Text.From([Column6]),Text.From([Column7]),Text.From([Column8])}," ")
else if isNumeric6 then Text.Combine({Text.From([Column7]),Text.From([Column8])}," ")
else if isNumeric7 then [Column8]
else if isNumeric8 then null
else Text.Combine({Text.From([Column5]),Text.From([Column6]),Text.From([Column7]),Text.From([Column8])}," ")
in
result),
#"Valeur remplacée2" = Table.ReplaceValue(#"Personnalisée ajoutée5"," ","",Replacer.ReplaceText,{"Tonnage"}),
#"Type modifié2" = Table.TransformColumnTypes(#"Valeur remplacée2",{{"Commentaire", type text}, {"Suite client", type text}, {"Tonnage", type number}}),
#"Colonne fusionnée insérée" = Table.AddColumn(#"Type modifié2", "Client", each Text.Combine({[Column4], [Suite client]}, " "), type text),
#"Colonnes supprimées1" = Table.RemoveColumns(#"Colonne fusionnée insérée",{"Column5", "Column6", "Column8", "Column7", "Suite client"}),
#"Colonnes renommées" = Table.RenameColumns(#"Colonnes supprimées1",{{"Column1", "Ville"}, {"Personnalisé", "Camion"}, {"Column3", "BL"}, {"Column4", "PDV"}, {"Personnalisé.1", "Date Livraison"}, {"Tonnage", "Tonnage kg"}, {"Source.Name", "Dispatch"}}),
#"Colonnes permutées1" = Table.ReorderColumns(#"Colonnes renommées",{"Dispatch", "Camion", "Ville", "BL", "PDV", "Tonnage kg", "Date Livraison", "A supprimer", "Commentaire", "Client"}),
#"Colonnes supprimées2" = Table.RemoveColumns(#"Colonnes permutées1",{"BL", "A supprimer", "Client", "Commentaire"}),
#"Lignes filtrées4" = Table.SelectRows(#"Colonnes supprimées2", each [Date Livraison] >= #date(2024, 1, 1) and [Date Livraison] <= #date(2024, 6, 30))
in
#"Lignes filtrées4"
Hi @medmbchr1989 ,
Did Omid_Motamedise reply solve your problem, if so, you can mark his reply as a solution so that other users with the same problem can find a solution faster. If not, you can continue to ask your question below.
Best regards,
Albert He
When you want to load all the data from pdf, instead of selecting any special page or table, select the file name as below
can you post one or two sample PDF files on a web share?