Call SSURGO from Python using requests, pandas, and soilDB-style wrappers. Five lessons from first call to full analysis pipeline.
pip install requests pandas matplotlib — all standard packages, no exotic dependencies.requests.post() → parse JSON → print results
The SDA API accepts a POST request with a query parameter and returns JSON. The response structure is always: Table[0] = column names, Table[1] = types (skip), Table[2:] = data rows.
import requests SDA_URL = "https://sdmdataaccess.sc.egov.usda.gov/Tabular/post.rest" def query_sda(sql): """POST a SQL query to SDA, return list of row dicts.""" resp = requests.post( SDA_URL, data={"query": sql, "format": "JSON+COLUMNNAME"} ) resp.raise_for_status() data = resp.json() if "Table" not in data or len(data["Table"]) < 2: return [] headers = data["Table"][0] rows = data["Table"][2:] # row 1 = types, skip it return [dict(zip(headers, row)) for row in rows] # Try it results = query_sda( "SELECT TOP 5 mukey, musym, muname, muacres FROM mapunit ORDER BY muacres DESC" ) for r in results: print(f"{r['musym']:10s} {r['muname'][:40]:40s} {r['muacres']} ac")
data["Table"][0] = ["mukey", "musym", "muname", "muacres"]data["Table"][1] = ["String", "String", "String", "Float"] (skip this)data["Table"][2] = ["462955", "ApA", "Antigo silt loam…", "18234.5"]Convert any SDA result to a DataFrame in 3 lines
Once you have a DataFrame, all of pandas' analysis and visualization tools are available — filtering, pivoting, groupby, plotting.
import requests import pandas as pd SDA_URL = "https://sdmdataaccess.sc.egov.usda.gov/Tabular/post.rest" def sda_to_df(sql): resp = requests.post(SDA_URL, data={"query": sql, "format": "JSON+COLUMNNAME"}) data = resp.json() headers = data["Table"][0] rows = data["Table"][2:] return pd.DataFrame(rows, columns=headers) # Organic matter by drainage class — Iowa sql = """ SELECT c.drainagecl, ROUND(AVG(CAST(ch.om_r AS FLOAT)), 3) AS avg_om_pct, COUNT(DISTINCT c.cokey) AS n_components FROM legend l INNER JOIN mapunit mu ON mu.lkey = l.lkey INNER JOIN component c ON c.mukey = mu.mukey INNER JOIN chorizon ch ON ch.cokey = c.cokey WHERE l.areasymbol LIKE 'IA%' AND c.majcompflag = 'Yes' AND ch.hzdept_r = 0 AND ch.om_r IS NOT NULL GROUP BY c.drainagecl ORDER BY avg_om_pct DESC """ df = sda_to_df(sql) df['avg_om_pct'] = pd.to_numeric(df['avg_om_pct']) print(df.to_string(index=False)) print(f"\nHighest OM class: {df.iloc[0]['drainagecl']}") print(f"Range: {df['avg_om_pct'].min():.2f}% – {df['avg_om_pct'].max():.2f}%")
Query dozens of survey areas without hitting row limits
SDA's immediate mode handles ~10,000 rows in ~30 seconds. For larger datasets, chunk your survey areas into batches of 10–15 and loop, respecting the API with small delays.
import requests, pandas as pd, time SDA_URL = "https://sdmdataaccess.sc.egov.usda.gov/Tabular/post.rest" def batch_query(areasymbols, chunk_size=10): all_frames = [] for i in range(0, len(areasymbols), chunk_size): chunk = areasymbols[i:i+chunk_size] placeholders = "','".join(chunk) sql = f""" SELECT l.areasymbol, mu.musym, mu.muname, c.compname, c.comppct_r, c.hydgrp FROM legend l INNER JOIN mapunit mu ON mu.lkey = l.lkey INNER JOIN component c ON c.mukey = mu.mukey WHERE l.areasymbol IN ('{placeholders}') AND c.majcompflag = 'Yes' """ resp = requests.post(SDA_URL, data={"query": sql, "format": "JSON+COLUMNNAME"}) data = resp.json() if "Table" in data and len(data["Table"]) >= 3: all_frames.append(pd.DataFrame(data["Table"][2:], columns=data["Table"][0])) time.sleep(0.3) # polite API usage return pd.concat(all_frames, ignore_index=True) if all_frames else pd.DataFrame() # Midwest corn belt survey areas midwest = ['IA001','IA003','IL001','IL003','IN001','MN001','MN003','WI001','WI003'] df = batch_query(midwest) print(f"Retrieved {len(df):,} rows across {df['areasymbol'].nunique()} survey areas")
Build a reusable SDA client class
requests + pandas.""" Python SDA client — inspired by R's soilDB package """ import requests, pandas as pd SDA_URL = "https://sdmdataaccess.sc.egov.usda.gov/Tabular/post.rest" def _post(sql): r = requests.post(SDA_URL, data={"query": sql, "format": "JSON+COLUMNNAME"}) d = r.json() if "Table" not in d or len(d["Table"]) < 3: return pd.DataFrame() return pd.DataFrame(d["Table"][2:], columns=d["Table"][0]) def fetchSDA_component(areasymbol, majcomp_only=True): flag = "AND c.majcompflag = 'Yes'" if majcomp_only else "" return _post(f""" SELECT mu.musym, mu.muname, mu.muacres, c.cokey, c.compname, c.comppct_r, c.taxclname, c.drainagecl, c.hydgrp FROM legend l INNER JOIN mapunit mu ON mu.lkey = l.lkey INNER JOIN component c ON c.mukey = mu.mukey WHERE l.areasymbol = '{areasymbol}' {flag} ORDER BY mu.musym, c.comppct_r DESC """) def fetchSDA_horizon(areasymbol): return _post(f""" SELECT mu.musym, c.compname, c.comppct_r, ch.hzname, ch.hzdept_r, ch.hzdepb_r, ch.om_r, ch.awc_r, ch.ph1to1h2o_r, ch.sandtotal_r, ch.claytotal_r FROM legend l INNER JOIN mapunit mu ON mu.lkey = l.lkey INNER JOIN component c ON c.mukey = mu.mukey INNER JOIN chorizon ch ON ch.cokey = c.cokey WHERE l.areasymbol = '{areasymbol}' AND c.majcompflag = 'Yes' ORDER BY mu.musym, c.comppct_r DESC, ch.hzdept_r """) # Usage comps = fetchSDA_component('WI025') hzns = fetchSDA_horizon('WI025') print(f"Components: {len(comps)}, Horizons: {len(hzns)}")
End-to-end analysis with matplotlib
Combining everything: a single script that queries SDA, processes results into a DataFrame, and produces a publication-quality chart comparing OM by soil taxonomy order and depth.
""" Full pipeline: SDA → pandas → matplotlib Organic matter by depth for different soil orders """ import requests, pandas as pd, matplotlib.pyplot as plt SDA_URL = "https://sdmdataaccess.sc.egov.usda.gov/Tabular/post.rest" def q(sql): r = requests.post(SDA_URL, data={"query": sql, "format": "JSON+COLUMNNAME"}) d = r.json() return pd.DataFrame(d["Table"][2:], columns=d["Table"][0]) if "Table" in d else pd.DataFrame() df = q(""" SELECT c.taxorder, CASE WHEN ch.hzdepb_r <= 20 THEN '0-20 cm' WHEN ch.hzdepb_r <= 50 THEN '20-50 cm' WHEN ch.hzdepb_r <= 100 THEN '50-100 cm' ELSE '100+ cm' END AS depth_zone, ROUND(AVG(CAST(ch.om_r AS FLOAT)), 3) AS avg_om FROM legend l INNER JOIN mapunit mu ON mu.lkey = l.lkey INNER JOIN component c ON c.mukey = mu.mukey INNER JOIN chorizon ch ON ch.cokey = c.cokey WHERE l.areasymbol LIKE 'IA%' AND c.majcompflag = 'Yes' AND c.taxorder IN ('mollisols','alfisols','inceptisols') AND ch.om_r IS NOT NULL GROUP BY c.taxorder, CASE WHEN ch.hzdepb_r<=20 THEN '0-20 cm' WHEN ch.hzdepb_r<=50 THEN '20-50 cm' WHEN ch.hzdepb_r<=100 THEN '50-100 cm' ELSE '100+ cm' END ORDER BY c.taxorder, MIN(ch.hzdept_r) """) df['avg_om'] = pd.to_numeric(df['avg_om']) fig, ax = plt.subplots(figsize=(10, 5)) for order, grp in df.groupby('taxorder'): ax.plot(grp['depth_zone'], grp['avg_om'], marker='o', label=order.title()) ax.set_xlabel('Depth Zone'); ax.set_ylabel('Avg OM (%)') ax.set_title('Organic Matter by Depth — Iowa Soils by Taxonomy Order') ax.legend(); ax.grid(alpha=0.3) plt.tight_layout() plt.savefig('iowa_om_depth.png', dpi=150) print("Saved iowa_om_depth.png")
Query SDA directly from JavaScript — no server required. Build live tables, connect to MapLibre GL maps, and ship interactive soil apps.
JavaScript API →