From d771bd512b2cc484a0b9286d66ecae1cd9a812c7 Mon Sep 17 00:00:00 2001
From: James Santucci <james.santucci@gmail.com>
Date: Sat, 6 Jun 2026 16:45:33 -0700
Subject: [PATCH 1/7] nix: Make project config consistent

* Make examples depend on granite version consistent with non-examples;
  non-examples want >= 0.6, examples wanted 0.6.*
* Remove `flake.lock` from `.gitignore` -- this is an important file to
have in version control; without it, reproducibility guarantees are out
the window (since, if we're using `nixpkgs-*` as an input, without the
`flake.lock`, the `nixpkgs-*` package set you use won't necessarily be
the same `nixpkgs-*` package set that I use)
* Add (almost) the rest of the dataframe-* packages to the package
outputs / dev shell
---
 .gitignore                                    |  3 +-
 dataframe-parquet/src/DataFrame/IO/Parquet.hs | 71 ++++++-------------
 examples/examples.cabal                       |  2 +-
 flake.lock                                    | 61 ++++++++++++++++
 flake.nix                                     | 62 +++++++++++++++-
 5 files changed, 144 insertions(+), 55 deletions(-)
 create mode 100644 flake.lock

diff --git a/.gitignore b/.gitignore
index 607f2e4a..ada5d0b5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,7 +28,6 @@ dataframe_benchmark/
 bin/
 coverage-html
 .DS_Store
-flake.lock
 tags
 __pycache__
 venv
@@ -45,4 +44,4 @@ Cargo.lock
 # (transient; the committed *.db fixtures themselves stay tracked).
 *.db-wal
 *.db-shm
-*.db-journal
\ No newline at end of file
+*.db-journal
diff --git a/dataframe-parquet/src/DataFrame/IO/Parquet.hs b/dataframe-parquet/src/DataFrame/IO/Parquet.hs
index 66e8ce0e..bcd7f653 100644
--- a/dataframe-parquet/src/DataFrame/IO/Parquet.hs
+++ b/dataframe-parquet/src/DataFrame/IO/Parquet.hs
@@ -401,28 +401,9 @@ getNonNullableColumn totalRows description chunks =
         PageDecoder a ->
         m Column
     go decoder =
-        foldNonNullable totalRows (foldColumnPagesM description decoder chunks)
-
-    -- Decode a non-nullable BYTE_ARRAY (UTF-8) column straight into a single
-    -- shared byte buffer + offsets ('PackedText'), instead of a boxed vector
-    -- of per-row 'Text'. Each page's decoded 'Text' values (which share the
-    -- chunk dictionary for dictionary-encoded pages) are appended by memcpy
-    -- into one builder across all pages/chunks, then frozen once. This is the
-    -- same representation the fast CSV reader uses and matches Arrow's string
-    -- layout: no retained per-row 'Text' headers, no eager UTF-8 validation.
-    goPackedText :: m Column
-    goPackedText = do
-        builder <- liftIO $ stToIO (newTextBuilder totalRows (totalRows * 8))
-        _ <-
-            foldColumnDataPagesM
-                description
-                chunks
-                ( \() (dict, enc, nPresent, valBytes, _, _) ->
-                    liftIO (appendStringPageIO builder dict enc nPresent valBytes)
-                )
-                ()
-        chunk <- liftIO $ stToIO (freezeTextChunk builder)
-        pure (mergeTextChunks [chunk])
+        foldNonNullable totalRows $
+            (\(vs, _, _) -> vs)
+                <$> Stream.unfoldMany (readPages description decoder) (Stream.fromList chunks)
 
     unboxedGo ::
         forall a.
@@ -430,7 +411,11 @@ getNonNullableColumn totalRows description chunks =
         UnboxedPageDecoder a ->
         m Column
     unboxedGo decoder =
-        foldNonNullableUnboxed totalRows (foldColumnPagesM description decoder chunks)
+        foldNonNullableUnboxed totalRows $
+            (\(vs, _, _) -> vs)
+                <$> Stream.unfoldMany
+                    (readPages description decoder)
+                    (Stream.fromList chunks)
 
 -- | Decode an optional (nullable) column.
 {-# INLINEABLE getNullableColumn #-}
@@ -464,36 +449,20 @@ getNullableColumn totalRows description chunks =
         PageDecoder a ->
         m Column
     go decoder =
-        foldNullable maxDef totalRows (foldColumnPagesM description decoder chunks)
-
-    -- Nullable BYTE_ARRAY (UTF-8): decode straight into a 'PackedText' (shared
-    -- byte buffer + offsets + validity bitmap) via the text builder, walking
-    -- def-levels to interleave nulls. Avoids the boxed @Vector Text@ the
-    -- generic 'foldNullable' path would build.
-    goPackedTextNullable :: m Column
-    goPackedTextNullable = do
-        builder <- liftIO $ stToIO (newTextBuilder totalRows (totalRows * 8))
-        _ <-
-            foldColumnDataPagesM
-                description
-                chunks
-                ( \() (dict, enc, nPresent, valBytes, defs, _) ->
-                    liftIO
-                        (appendNullableStringPageIO builder maxDef dict enc nPresent valBytes defs)
-                )
-                ()
-        chunk <- liftIO $ stToIO (freezeTextChunk builder)
-        pure (mergeTextChunks [chunk])
+        foldNullable maxDef totalRows $
+            (\(vs, ds, _) -> (vs, ds))
+                <$> Stream.unfoldMany (readPages description decoder) (Stream.fromList chunks)
     unboxedGo ::
         forall a.
         (Columnable a, VU.Unbox a) =>
         UnboxedPageDecoder a ->
         m Column
     unboxedGo decoder =
-        foldNullableUnboxed
-            maxDef
-            totalRows
-            (foldColumnPagesM description decoder chunks)
+        foldNullableUnboxed maxDef totalRows $
+            (\(vs, ds, _) -> (vs, ds))
+                <$> Stream.unfoldMany
+                    (readPages description decoder)
+                    (Stream.fromList chunks)
 
 -- | Decode a repeated (list/nested) column.
 {-# INLINEABLE getRepeatedColumn #-}
@@ -532,7 +501,8 @@ getRepeatedColumn description chunks =
         PageDecoder a ->
         m Column
     go decoder =
-        foldRepeated maxRep maxDef (foldColumnPagesM description decoder chunks)
+        foldRepeated maxRep maxDef $
+            Stream.unfoldMany (readPages description decoder) (Stream.fromList chunks)
 
     unboxedGo ::
         forall a.
@@ -545,7 +515,10 @@ getRepeatedColumn description chunks =
         UnboxedPageDecoder a ->
         m Column
     unboxedGo decoder =
-        foldRepeatedUnboxed maxRep maxDef (foldColumnPagesM description decoder chunks)
+        foldRepeatedUnboxed maxRep maxDef $
+            Stream.unfoldMany
+                (readPages description decoder)
+                (Stream.fromList chunks)
 
 -- Options application -----------------------------------------------------
 
diff --git a/examples/examples.cabal b/examples/examples.cabal
index 0cb1d9f7..e74248a2 100644
--- a/examples/examples.cabal
+++ b/examples/examples.cabal
@@ -139,7 +139,7 @@ executable examples
                       cassava >= 0.1 && < 1,
                       containers >= 0.6.7 && < 0.9,
                       directory >= 1.3.0.0 && < 2,
-                      granite ^>= 0.6,
+                      granite >= 0.6 && < 1,
                       hashable >= 1.2 && < 2,
                       hasktorch,
                       http-conduit,
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 00000000..96f37ecf
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,61 @@
+{
+  "nodes": {
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1731533236,
+        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1780243769,
+        "narHash": "sha256-x5UQuRsH3MqI0U9afaXSNqzTPSeZlRLvFAav2Ux1pNw=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "331800de5053fcebacf6813adb5db9c9dca22a0c",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "flake-utils": "flake-utils",
+        "nixpkgs": "nixpkgs"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
diff --git a/flake.nix b/flake.nix
index b5e3f8e4..b1f06b80 100644
--- a/flake.nix
+++ b/flake.nix
@@ -14,14 +14,42 @@
           repo = "granite";
           owner = "mchav";
           rev = "main";
-          hash = "sha256-Z/o8gxMOBltKiaL0NEjMUyOvUljRvKErWeM6Ul3GM9k=";
+          hash = "sha256-jmmI2+kbqe+X/CDP986qQnUMGR35iNW5deNLovHpBHA=";
+        };
+        pinchPkg = pkgs.fetchFromGitHub {
+          repo = "pinch";
+          owner = "abhinav";
+          rev = "v0.5.2.0";
+          hash = "sha256-kuCS4EePc4aIONCvF0sOZt4pCazAq1z9+a/AY9b7Q6c=";
+        };
+        networkRunPkg = pkgs.fetchFromGitHub {
+          repo = "network-run";
+          owner = "kazu-yamamoto";
+          rev = "v0.3.1";
+          hash = "sha256-xyyf+Le2x9ACJBE4ua7wWHsfOQHNi7D+DksghZFh35I=";
         };
 
         hsPkgs = pkgs.haskellPackages.extend (self: super: {
           granite = self.callCabal2nix "granite" granitePkg { };
+          network-run = self.callCabal2nix "network-run" networkRunPkg { };
+          pinch = self.callCabal2nix "pinch" pinchPkg { };
+          dataframe-arrow = self.callCabal2nix "dataframe-arrow" ./dataframe-arrow { };
+          dataframe-core = self.callCabal2nix "dataframe-core" ./dataframe-core { };
+          dataframe-csv = self.callCabal2nix "dataframe-csv" ./dataframe-csv { };
+          dataframe-csv-th = self.callCabal2nix "dataframe-csv-th" ./dataframe-csv-th { };
           dataframe-fastcsv = self.callCabal2nix "dataframe-fastcsv" ./dataframe-fastcsv { };
-          dataframe-persistent = self.callCabal2nix "dataframe-persistent" ./dataframe-persistent { };
+          # dataframe-fusion = self.callCabal2nix "dataframe-fusion" ./dataframe-fusion { };
           dataframe-hasktorch = self.callCabal2nix "dataframe-hasktorch" ./dataframe-hasktorch { };
+          dataframe-json = self.callCabal2nix "dataframe-json" ./dataframe-json { };
+          dataframe-lazy = self.callCabal2nix "dataframe-lazy" ./dataframe-lazy { };
+          dataframe-learn = self.callCabal2nix "dataframe-learn" ./dataframe-learn { };
+          dataframe-operations = self.callCabal2nix "dataframe-operations" ./dataframe-operations { };
+          dataframe-parquet = self.callCabal2nix "dataframe-parquet" ./dataframe-parquet { };
+          dataframe-parquet-th = self.callCabal2nix "dataframe-parquet-th" ./dataframe-parquet-th { };
+          dataframe-parsing = self.callCabal2nix "dataframe-parsing" ./dataframe-parsing { };
+          dataframe-persistent = self.callCabal2nix "dataframe-persistent" ./dataframe-persistent { };
+          dataframe-th = self.callCabal2nix "dataframe-th" ./dataframe-th { };
+          dataframe-viz = self.callCabal2nix "dataframe-viz" ./dataframe-viz { };
           dataframe = self.callCabal2nix "dataframe" ./. { };
         });
       in
@@ -29,17 +57,45 @@
         packages = {
           default = hsPkgs.dataframe;
           dataframe = hsPkgs.dataframe;
+          dataframe-arrow = hsPkgs.dataframe-arrow;
+          dataframe-core = hsPkgs.dataframe-core;
+          dataframe-csv = hsPkgs.dataframe-csv;
+          dataframe-csv-th = hsPkgs.dataframe-csv-th;
           dataframe-fastcsv = hsPkgs.dataframe-fastcsv;
+          # dataframe-fusion = hsPkgs.dataframe-fusion;
           dataframe-hasktorch = hsPkgs.dataframe-hasktorch;
+          dataframe-json = hsPkgs.dataframe-json;
+          dataframe-lazy = hsPkgs.dataframe-lazy;
+          dataframe-learn = hsPkgs.dataframe-learn;
+          dataframe-operations = hsPkgs.dataframe-operations;
+          dataframe-parquet = hsPkgs.dataframe-parquet;
+          dataframe-parquet-th = hsPkgs.dataframe-parquet-th;
+          dataframe-parsing = hsPkgs.dataframe-parsing;
           dataframe-persistent = hsPkgs.dataframe-persistent;
+          dataframe-th = hsPkgs.dataframe-th;
+          dataframe-viz = hsPkgs.dataframe-viz;
         };
 
         devShells.default = hsPkgs.shellFor {
           packages = ps: [
             ps.dataframe
+            ps.dataframe-arrow
+            ps.dataframe-core
+            ps.dataframe-csv
+            ps.dataframe-csv-th
             ps.dataframe-fastcsv
-            ps.dataframe-persistent
+            # ps.dataframe-fusion
             ps.dataframe-hasktorch
+            ps.dataframe-json
+            ps.dataframe-lazy
+            ps.dataframe-learn
+            ps.dataframe-operations
+            ps.dataframe-parquet
+            ps.dataframe-parquet-th
+            ps.dataframe-parsing
+            ps.dataframe-persistent
+            ps.dataframe-th
+            ps.dataframe-viz
           ];
           nativeBuildInputs = with hsPkgs; [
             ghc

From 28735cd213db06219a515ca001a6d1aa5b98a77e Mon Sep 17 00:00:00 2001
From: James Santucci <james.santucci@gmail.com>
Date: Tue, 30 Jun 2026 17:36:37 -0700
Subject: [PATCH 2/7] update granite rev

---
 flake.nix | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/flake.nix b/flake.nix
index b1f06b80..e5b0f113 100644
--- a/flake.nix
+++ b/flake.nix
@@ -13,8 +13,9 @@
         granitePkg = pkgs.fetchFromGitHub {
           repo = "granite";
           owner = "mchav";
-          rev = "main";
-          hash = "sha256-jmmI2+kbqe+X/CDP986qQnUMGR35iNW5deNLovHpBHA=";
+          # main as of 2026/06/30
+          rev = "b3e83fc42ef3a3e032f58072ae1962281a7b2b00";
+          hash = "sha256-xT85Kdsk1tFD3+7Tuv69hpTwB/NPwJ1KFus1MfPIGBE=";
         };
         pinchPkg = pkgs.fetchFromGitHub {
           repo = "pinch";

From 15c5e0525f0d30958ce5e3b919af50f793485f3e Mon Sep 17 00:00:00 2001
From: James Santucci <james.santucci@gmail.com>
Date: Tue, 30 Jun 2026 17:36:44 -0700
Subject: [PATCH 3/7] add other packages

---
 flake.nix | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/flake.nix b/flake.nix
index e5b0f113..47d54dc3 100644
--- a/flake.nix
+++ b/flake.nix
@@ -38,9 +38,11 @@
           dataframe-core = self.callCabal2nix "dataframe-core" ./dataframe-core { };
           dataframe-csv = self.callCabal2nix "dataframe-csv" ./dataframe-csv { };
           dataframe-csv-th = self.callCabal2nix "dataframe-csv-th" ./dataframe-csv-th { };
+          dataframe-expr-serializer = self.callCabal2nix "dataframe-expr-serializer" ./dataframe-expr-serializer { };
           dataframe-fastcsv = self.callCabal2nix "dataframe-fastcsv" ./dataframe-fastcsv { };
           # dataframe-fusion = self.callCabal2nix "dataframe-fusion" ./dataframe-fusion { };
           dataframe-hasktorch = self.callCabal2nix "dataframe-hasktorch" ./dataframe-hasktorch { };
+          dataframe-huggingface = self.callCabal2nix "dataframe-huggingface" ./dataframe-huggingface { };
           dataframe-json = self.callCabal2nix "dataframe-json" ./dataframe-json { };
           dataframe-lazy = self.callCabal2nix "dataframe-lazy" ./dataframe-lazy { };
           dataframe-learn = self.callCabal2nix "dataframe-learn" ./dataframe-learn { };
@@ -84,9 +86,11 @@
             ps.dataframe-core
             ps.dataframe-csv
             ps.dataframe-csv-th
+            ps.dataframe-expr-serializer
             ps.dataframe-fastcsv
             # ps.dataframe-fusion
             ps.dataframe-hasktorch
+            ps.dataframe-huggingface
             ps.dataframe-json
             ps.dataframe-lazy
             ps.dataframe-learn

From b21897c013e40df9dc0b6abcb39c9a1a43102741 Mon Sep 17 00:00:00 2001
From: James Santucci <james.santucci@gmail.com>
Date: Tue, 30 Jun 2026 17:54:08 -0700
Subject: [PATCH 4/7] parallel -> 3.3.0.0 for df-learn + fmt

---
 flake.nix | 69 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 28 deletions(-)

diff --git a/flake.nix b/flake.nix
index 47d54dc3..2c8470f3 100644
--- a/flake.nix
+++ b/flake.nix
@@ -6,8 +6,14 @@
     flake-utils.url = "github:numtide/flake-utils";
   };
 
-  outputs = { self, nixpkgs, flake-utils }:
-    flake-utils.lib.eachDefaultSystem (system:
+  outputs =
+    {
+      self,
+      nixpkgs,
+      flake-utils,
+    }:
+    flake-utils.lib.eachDefaultSystem (
+      system:
       let
         pkgs = nixpkgs.legacyPackages.${system};
         granitePkg = pkgs.fetchFromGitHub {
@@ -30,31 +36,37 @@
           hash = "sha256-xyyf+Le2x9ACJBE4ua7wWHsfOQHNi7D+DksghZFh35I=";
         };
 
-        hsPkgs = pkgs.haskellPackages.extend (self: super: {
-          granite = self.callCabal2nix "granite" granitePkg { };
-          network-run = self.callCabal2nix "network-run" networkRunPkg { };
-          pinch = self.callCabal2nix "pinch" pinchPkg { };
-          dataframe-arrow = self.callCabal2nix "dataframe-arrow" ./dataframe-arrow { };
-          dataframe-core = self.callCabal2nix "dataframe-core" ./dataframe-core { };
-          dataframe-csv = self.callCabal2nix "dataframe-csv" ./dataframe-csv { };
-          dataframe-csv-th = self.callCabal2nix "dataframe-csv-th" ./dataframe-csv-th { };
-          dataframe-expr-serializer = self.callCabal2nix "dataframe-expr-serializer" ./dataframe-expr-serializer { };
-          dataframe-fastcsv = self.callCabal2nix "dataframe-fastcsv" ./dataframe-fastcsv { };
-          # dataframe-fusion = self.callCabal2nix "dataframe-fusion" ./dataframe-fusion { };
-          dataframe-hasktorch = self.callCabal2nix "dataframe-hasktorch" ./dataframe-hasktorch { };
-          dataframe-huggingface = self.callCabal2nix "dataframe-huggingface" ./dataframe-huggingface { };
-          dataframe-json = self.callCabal2nix "dataframe-json" ./dataframe-json { };
-          dataframe-lazy = self.callCabal2nix "dataframe-lazy" ./dataframe-lazy { };
-          dataframe-learn = self.callCabal2nix "dataframe-learn" ./dataframe-learn { };
-          dataframe-operations = self.callCabal2nix "dataframe-operations" ./dataframe-operations { };
-          dataframe-parquet = self.callCabal2nix "dataframe-parquet" ./dataframe-parquet { };
-          dataframe-parquet-th = self.callCabal2nix "dataframe-parquet-th" ./dataframe-parquet-th { };
-          dataframe-parsing = self.callCabal2nix "dataframe-parsing" ./dataframe-parsing { };
-          dataframe-persistent = self.callCabal2nix "dataframe-persistent" ./dataframe-persistent { };
-          dataframe-th = self.callCabal2nix "dataframe-th" ./dataframe-th { };
-          dataframe-viz = self.callCabal2nix "dataframe-viz" ./dataframe-viz { };
-          dataframe = self.callCabal2nix "dataframe" ./. { };
-        });
+        hsPkgs = pkgs.haskellPackages.extend (
+          self: super: {
+            granite = self.callCabal2nix "granite" granitePkg { };
+            network-run = self.callCabal2nix "network-run" networkRunPkg { };
+            pinch = self.callCabal2nix "pinch" pinchPkg { };
+            dataframe-arrow = self.callCabal2nix "dataframe-arrow" ./dataframe-arrow { };
+            dataframe-core = self.callCabal2nix "dataframe-core" ./dataframe-core { };
+            dataframe-csv = self.callCabal2nix "dataframe-csv" ./dataframe-csv { };
+            dataframe-csv-th = self.callCabal2nix "dataframe-csv-th" ./dataframe-csv-th { };
+            dataframe-expr-serializer =
+              self.callCabal2nix "dataframe-expr-serializer" ./dataframe-expr-serializer
+                { };
+            dataframe-fastcsv = self.callCabal2nix "dataframe-fastcsv" ./dataframe-fastcsv { };
+            # dataframe-fusion = self.callCabal2nix "dataframe-fusion" ./dataframe-fusion { };
+            dataframe-hasktorch = self.callCabal2nix "dataframe-hasktorch" ./dataframe-hasktorch { };
+            dataframe-huggingface = self.callCabal2nix "dataframe-huggingface" ./dataframe-huggingface { };
+            dataframe-json = self.callCabal2nix "dataframe-json" ./dataframe-json { };
+            dataframe-lazy = self.callCabal2nix "dataframe-lazy" ./dataframe-lazy { };
+            dataframe-learn = self.callCabal2nix "dataframe-learn" ./dataframe-learn {
+              parallel = pkgs.haskell.lib.dontCheck (self.callHackage "parallel" "3.3.0.0" { });
+            };
+            dataframe-operations = self.callCabal2nix "dataframe-operations" ./dataframe-operations { };
+            dataframe-parquet = self.callCabal2nix "dataframe-parquet" ./dataframe-parquet { };
+            dataframe-parquet-th = self.callCabal2nix "dataframe-parquet-th" ./dataframe-parquet-th { };
+            dataframe-parsing = self.callCabal2nix "dataframe-parsing" ./dataframe-parsing { };
+            dataframe-persistent = self.callCabal2nix "dataframe-persistent" ./dataframe-persistent { };
+            dataframe-th = self.callCabal2nix "dataframe-th" ./dataframe-th { };
+            dataframe-viz = self.callCabal2nix "dataframe-viz" ./dataframe-viz { };
+            dataframe = self.callCabal2nix "dataframe" ./. { };
+          }
+        );
       in
       {
         packages = {
@@ -109,5 +121,6 @@
           ];
           withHoogle = true;
         };
-      });
+      }
+    );
 }

From 3104fccaef734b28058e512fbfc4184fadbee178 Mon Sep 17 00:00:00 2001
From: James Santucci <james.santucci@gmail.com>
Date: Tue, 30 Jun 2026 17:57:00 -0700
Subject: [PATCH 5/7] Revert changes to IO/Parquet.hs

---
 dataframe-parquet/src/DataFrame/IO/Parquet.hs | 71 +++++++++++++------
 1 file changed, 49 insertions(+), 22 deletions(-)

diff --git a/dataframe-parquet/src/DataFrame/IO/Parquet.hs b/dataframe-parquet/src/DataFrame/IO/Parquet.hs
index bcd7f653..66e8ce0e 100644
--- a/dataframe-parquet/src/DataFrame/IO/Parquet.hs
+++ b/dataframe-parquet/src/DataFrame/IO/Parquet.hs
@@ -401,9 +401,28 @@ getNonNullableColumn totalRows description chunks =
         PageDecoder a ->
         m Column
     go decoder =
-        foldNonNullable totalRows $
-            (\(vs, _, _) -> vs)
-                <$> Stream.unfoldMany (readPages description decoder) (Stream.fromList chunks)
+        foldNonNullable totalRows (foldColumnPagesM description decoder chunks)
+
+    -- Decode a non-nullable BYTE_ARRAY (UTF-8) column straight into a single
+    -- shared byte buffer + offsets ('PackedText'), instead of a boxed vector
+    -- of per-row 'Text'. Each page's decoded 'Text' values (which share the
+    -- chunk dictionary for dictionary-encoded pages) are appended by memcpy
+    -- into one builder across all pages/chunks, then frozen once. This is the
+    -- same representation the fast CSV reader uses and matches Arrow's string
+    -- layout: no retained per-row 'Text' headers, no eager UTF-8 validation.
+    goPackedText :: m Column
+    goPackedText = do
+        builder <- liftIO $ stToIO (newTextBuilder totalRows (totalRows * 8))
+        _ <-
+            foldColumnDataPagesM
+                description
+                chunks
+                ( \() (dict, enc, nPresent, valBytes, _, _) ->
+                    liftIO (appendStringPageIO builder dict enc nPresent valBytes)
+                )
+                ()
+        chunk <- liftIO $ stToIO (freezeTextChunk builder)
+        pure (mergeTextChunks [chunk])
 
     unboxedGo ::
         forall a.
@@ -411,11 +430,7 @@ getNonNullableColumn totalRows description chunks =
         UnboxedPageDecoder a ->
         m Column
     unboxedGo decoder =
-        foldNonNullableUnboxed totalRows $
-            (\(vs, _, _) -> vs)
-                <$> Stream.unfoldMany
-                    (readPages description decoder)
-                    (Stream.fromList chunks)
+        foldNonNullableUnboxed totalRows (foldColumnPagesM description decoder chunks)
 
 -- | Decode an optional (nullable) column.
 {-# INLINEABLE getNullableColumn #-}
@@ -449,20 +464,36 @@ getNullableColumn totalRows description chunks =
         PageDecoder a ->
         m Column
     go decoder =
-        foldNullable maxDef totalRows $
-            (\(vs, ds, _) -> (vs, ds))
-                <$> Stream.unfoldMany (readPages description decoder) (Stream.fromList chunks)
+        foldNullable maxDef totalRows (foldColumnPagesM description decoder chunks)
+
+    -- Nullable BYTE_ARRAY (UTF-8): decode straight into a 'PackedText' (shared
+    -- byte buffer + offsets + validity bitmap) via the text builder, walking
+    -- def-levels to interleave nulls. Avoids the boxed @Vector Text@ the
+    -- generic 'foldNullable' path would build.
+    goPackedTextNullable :: m Column
+    goPackedTextNullable = do
+        builder <- liftIO $ stToIO (newTextBuilder totalRows (totalRows * 8))
+        _ <-
+            foldColumnDataPagesM
+                description
+                chunks
+                ( \() (dict, enc, nPresent, valBytes, defs, _) ->
+                    liftIO
+                        (appendNullableStringPageIO builder maxDef dict enc nPresent valBytes defs)
+                )
+                ()
+        chunk <- liftIO $ stToIO (freezeTextChunk builder)
+        pure (mergeTextChunks [chunk])
     unboxedGo ::
         forall a.
         (Columnable a, VU.Unbox a) =>
         UnboxedPageDecoder a ->
         m Column
     unboxedGo decoder =
-        foldNullableUnboxed maxDef totalRows $
-            (\(vs, ds, _) -> (vs, ds))
-                <$> Stream.unfoldMany
-                    (readPages description decoder)
-                    (Stream.fromList chunks)
+        foldNullableUnboxed
+            maxDef
+            totalRows
+            (foldColumnPagesM description decoder chunks)
 
 -- | Decode a repeated (list/nested) column.
 {-# INLINEABLE getRepeatedColumn #-}
@@ -501,8 +532,7 @@ getRepeatedColumn description chunks =
         PageDecoder a ->
         m Column
     go decoder =
-        foldRepeated maxRep maxDef $
-            Stream.unfoldMany (readPages description decoder) (Stream.fromList chunks)
+        foldRepeated maxRep maxDef (foldColumnPagesM description decoder chunks)
 
     unboxedGo ::
         forall a.
@@ -515,10 +545,7 @@ getRepeatedColumn description chunks =
         UnboxedPageDecoder a ->
         m Column
     unboxedGo decoder =
-        foldRepeatedUnboxed maxRep maxDef $
-            Stream.unfoldMany
-                (readPages description decoder)
-                (Stream.fromList chunks)
+        foldRepeatedUnboxed maxRep maxDef (foldColumnPagesM description decoder chunks)
 
 -- Options application -----------------------------------------------------
 

From 6b51abd6827d34854323804aca93012543ee242b Mon Sep 17 00:00:00 2001
From: James Santucci <james.santucci@gmail.com>
Date: Tue, 30 Jun 2026 18:11:49 -0700
Subject: [PATCH 6/7] Move package overrides inside what depends on them

---
 flake.nix | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/flake.nix b/flake.nix
index 2c8470f3..2f2d5fb6 100644
--- a/flake.nix
+++ b/flake.nix
@@ -38,9 +38,7 @@
 
         hsPkgs = pkgs.haskellPackages.extend (
           self: super: {
-            granite = self.callCabal2nix "granite" granitePkg { };
             network-run = self.callCabal2nix "network-run" networkRunPkg { };
-            pinch = self.callCabal2nix "pinch" pinchPkg { };
             dataframe-arrow = self.callCabal2nix "dataframe-arrow" ./dataframe-arrow { };
             dataframe-core = self.callCabal2nix "dataframe-core" ./dataframe-core { };
             dataframe-csv = self.callCabal2nix "dataframe-csv" ./dataframe-csv { };
@@ -58,12 +56,16 @@
               parallel = pkgs.haskell.lib.dontCheck (self.callHackage "parallel" "3.3.0.0" { });
             };
             dataframe-operations = self.callCabal2nix "dataframe-operations" ./dataframe-operations { };
-            dataframe-parquet = self.callCabal2nix "dataframe-parquet" ./dataframe-parquet { };
+            dataframe-parquet = self.callCabal2nix "dataframe-parquet" ./dataframe-parquet {
+              pinch = self.callCabal2nix "pinch" pinchPkg { };
+            };
             dataframe-parquet-th = self.callCabal2nix "dataframe-parquet-th" ./dataframe-parquet-th { };
             dataframe-parsing = self.callCabal2nix "dataframe-parsing" ./dataframe-parsing { };
             dataframe-persistent = self.callCabal2nix "dataframe-persistent" ./dataframe-persistent { };
             dataframe-th = self.callCabal2nix "dataframe-th" ./dataframe-th { };
-            dataframe-viz = self.callCabal2nix "dataframe-viz" ./dataframe-viz { };
+            dataframe-viz = self.callCabal2nix "dataframe-viz" ./dataframe-viz {
+              granite = self.callCabal2nix "granite" granitePkg { };
+            };
             dataframe = self.callCabal2nix "dataframe" ./. { };
           }
         );

From 849ea8fd459e546c33bde61e107578def14c633b Mon Sep 17 00:00:00 2001
From: James Santucci <james.santucci@gmail.com>
Date: Wed, 1 Jul 2026 17:24:52 -0700
Subject: [PATCH 7/7] Move additional haskell deps into inputs

---
 flake.lock | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 flake.nix  | 40 +++++++++++++++++-----------------------
 2 files changed, 69 insertions(+), 24 deletions(-)

diff --git a/flake.lock b/flake.lock
index 96f37ecf..2b42c650 100644
--- a/flake.lock
+++ b/flake.lock
@@ -18,6 +18,38 @@
         "type": "github"
       }
     },
+    "granite": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1782949848,
+        "narHash": "sha256-oPoDjgrep4DgOTH+UatooiUdVDMBjMRv+ai3fIvulTE=",
+        "owner": "mchav",
+        "repo": "granite",
+        "rev": "3d62c7ce2f02f73b1c0614e3721b7af27147f110",
+        "type": "github"
+      },
+      "original": {
+        "owner": "mchav",
+        "repo": "granite",
+        "type": "github"
+      }
+    },
+    "network-run": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1763529229,
+        "narHash": "sha256-j3Pkvn/eXiciQQIIc+SkWiFSLqbVyAc0SfLyIRZUsv8=",
+        "owner": "kazu-yamamoto",
+        "repo": "network-run",
+        "rev": "f49d0eeafcecce3e26e66edb3209a04cba30defa",
+        "type": "github"
+      },
+      "original": {
+        "owner": "kazu-yamamoto",
+        "repo": "network-run",
+        "type": "github"
+      }
+    },
     "nixpkgs": {
       "locked": {
         "lastModified": 1780243769,
@@ -34,10 +66,29 @@
         "type": "github"
       }
     },
+    "pinch": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1763952216,
+        "narHash": "sha256-rpk7mqi4C77UFIRTDVWykE30jd5OY5K4l9LWiGo46f8=",
+        "owner": "abhinav",
+        "repo": "pinch",
+        "rev": "0bf7dddf7c3203d3c04aedb709a1c774f99ff796",
+        "type": "github"
+      },
+      "original": {
+        "owner": "abhinav",
+        "repo": "pinch",
+        "type": "github"
+      }
+    },
     "root": {
       "inputs": {
         "flake-utils": "flake-utils",
-        "nixpkgs": "nixpkgs"
+        "granite": "granite",
+        "network-run": "network-run",
+        "nixpkgs": "nixpkgs",
+        "pinch": "pinch"
       }
     },
     "systems": {
diff --git a/flake.nix b/flake.nix
index 2f2d5fb6..c3d683a5 100644
--- a/flake.nix
+++ b/flake.nix
@@ -4,41 +4,35 @@
   inputs = {
     nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
     flake-utils.url = "github:numtide/flake-utils";
+    pinch = {
+      url = "github:abhinav/pinch";
+      flake = false;
+    };
+    network-run = {
+      url = "github:kazu-yamamoto/network-run";
+      flake = false;
+    };
+    granite = {
+      url = "github:mchav/granite";
+      flake = false;
+    };
   };
 
   outputs =
-    {
+    inputs@{
       self,
       nixpkgs,
       flake-utils,
+      ...
     }:
     flake-utils.lib.eachDefaultSystem (
       system:
       let
         pkgs = nixpkgs.legacyPackages.${system};
-        granitePkg = pkgs.fetchFromGitHub {
-          repo = "granite";
-          owner = "mchav";
-          # main as of 2026/06/30
-          rev = "b3e83fc42ef3a3e032f58072ae1962281a7b2b00";
-          hash = "sha256-xT85Kdsk1tFD3+7Tuv69hpTwB/NPwJ1KFus1MfPIGBE=";
-        };
-        pinchPkg = pkgs.fetchFromGitHub {
-          repo = "pinch";
-          owner = "abhinav";
-          rev = "v0.5.2.0";
-          hash = "sha256-kuCS4EePc4aIONCvF0sOZt4pCazAq1z9+a/AY9b7Q6c=";
-        };
-        networkRunPkg = pkgs.fetchFromGitHub {
-          repo = "network-run";
-          owner = "kazu-yamamoto";
-          rev = "v0.3.1";
-          hash = "sha256-xyyf+Le2x9ACJBE4ua7wWHsfOQHNi7D+DksghZFh35I=";
-        };
 
         hsPkgs = pkgs.haskellPackages.extend (
           self: super: {
-            network-run = self.callCabal2nix "network-run" networkRunPkg { };
+            network-run = self.callCabal2nix "network-run" inputs.network-run { };
             dataframe-arrow = self.callCabal2nix "dataframe-arrow" ./dataframe-arrow { };
             dataframe-core = self.callCabal2nix "dataframe-core" ./dataframe-core { };
             dataframe-csv = self.callCabal2nix "dataframe-csv" ./dataframe-csv { };
@@ -57,14 +51,14 @@
             };
             dataframe-operations = self.callCabal2nix "dataframe-operations" ./dataframe-operations { };
             dataframe-parquet = self.callCabal2nix "dataframe-parquet" ./dataframe-parquet {
-              pinch = self.callCabal2nix "pinch" pinchPkg { };
+              pinch = self.callCabal2nix "pinch" inputs.pinch { };
             };
             dataframe-parquet-th = self.callCabal2nix "dataframe-parquet-th" ./dataframe-parquet-th { };
             dataframe-parsing = self.callCabal2nix "dataframe-parsing" ./dataframe-parsing { };
             dataframe-persistent = self.callCabal2nix "dataframe-persistent" ./dataframe-persistent { };
             dataframe-th = self.callCabal2nix "dataframe-th" ./dataframe-th { };
             dataframe-viz = self.callCabal2nix "dataframe-viz" ./dataframe-viz {
-              granite = self.callCabal2nix "granite" granitePkg { };
+              granite = self.callCabal2nix "granite" inputs.granite { };
             };
             dataframe = self.callCabal2nix "dataframe" ./. { };
           }