Python API Track — SDA Training

Lessons

Your First Python Call

requests.post() → parse JSON → print results

Python ▼

The SDA API accepts a POST request with a query parameter and returns JSON. The response structure is always: Table[0] = column names, Table[1] = types (skip), Table[2:] = data rows.

import requests

SDA_URL = "https://sdmdataaccess.sc.egov.usda.gov/Tabular/post.rest"

def query_sda(sql):
    """POST a SQL query to SDA, return list of row dicts."""
    resp = requests.post(
        SDA_URL,
        data={"query": sql, "format": "JSON+COLUMNNAME"}
    )
    resp.raise_for_status()
    data = resp.json()
    if "Table" not in data or len(data["Table"]) < 2:
        return []
    headers = data["Table"][0]
    rows    = data["Table"][2:]  # row 1 = types, skip it
    return [dict(zip(headers, row)) for row in rows]

# Try it
results = query_sda(
    "SELECT TOP 5 mukey, musym, muname, muacres FROM mapunit ORDER BY muacres DESC"
)
for r in results:
    print(f"{r['musym']:10s}  {r['muname'][:40]:40s}  {r['muacres']} ac")

JSON Response Structure:
data["Table"][0] = ["mukey", "musym", "muname", "muacres"]
data["Table"][1] = ["String", "String", "String", "Float"] (skip this)
data["Table"][2] = ["462955", "ApA", "Antigo silt loam…", "18234.5"]

pandas DataFrame from SDA

Convert any SDA result to a DataFrame in 3 lines

Python ▼

Once you have a DataFrame, all of pandas' analysis and visualization tools are available — filtering, pivoting, groupby, plotting.

import requests
import pandas as pd

SDA_URL = "https://sdmdataaccess.sc.egov.usda.gov/Tabular/post.rest"

def sda_to_df(sql):
    resp = requests.post(SDA_URL, data={"query": sql, "format": "JSON+COLUMNNAME"})
    data = resp.json()
    headers = data["Table"][0]
    rows    = data["Table"][2:]
    return pd.DataFrame(rows, columns=headers)

# Organic matter by drainage class — Iowa
sql = """
SELECT
    c.drainagecl,
    ROUND(AVG(CAST(ch.om_r AS FLOAT)), 3) AS avg_om_pct,
    COUNT(DISTINCT c.cokey)               AS n_components
FROM legend l
INNER JOIN mapunit   mu ON mu.lkey  = l.lkey
INNER JOIN component  c ON c.mukey  = mu.mukey
INNER JOIN chorizon  ch ON ch.cokey = c.cokey
WHERE l.areasymbol LIKE 'IA%'
  AND c.majcompflag = 'Yes'
  AND ch.hzdept_r = 0
  AND ch.om_r IS NOT NULL
GROUP BY c.drainagecl
ORDER BY avg_om_pct DESC
"""

df = sda_to_df(sql)
df['avg_om_pct'] = pd.to_numeric(df['avg_om_pct'])
print(df.to_string(index=False))
print(f"\nHighest OM class: {df.iloc[0]['drainagecl']}")
print(f"Range: {df['avg_om_pct'].min():.2f}% – {df['avg_om_pct'].max():.2f}%")

Batched Queries — Large Datasets

Query dozens of survey areas without hitting row limits

Python ▼

SDA's immediate mode handles ~10,000 rows in ~30 seconds. For larger datasets, chunk your survey areas into batches of 10–15 and loop, respecting the API with small delays.

import requests, pandas as pd, time

SDA_URL = "https://sdmdataaccess.sc.egov.usda.gov/Tabular/post.rest"

def batch_query(areasymbols, chunk_size=10):
    all_frames = []
    for i in range(0, len(areasymbols), chunk_size):
        chunk = areasymbols[i:i+chunk_size]
        placeholders = "','".join(chunk)
        sql = f"""
        SELECT l.areasymbol, mu.musym, mu.muname,
               c.compname, c.comppct_r, c.hydgrp
        FROM legend l
        INNER JOIN mapunit mu  ON mu.lkey  = l.lkey
        INNER JOIN component c ON c.mukey  = mu.mukey
        WHERE l.areasymbol IN ('{placeholders}')
          AND c.majcompflag = 'Yes'
        """
        resp = requests.post(SDA_URL, data={"query": sql, "format": "JSON+COLUMNNAME"})
        data = resp.json()
        if "Table" in data and len(data["Table"]) >= 3:
            all_frames.append(pd.DataFrame(data["Table"][2:], columns=data["Table"][0]))
        time.sleep(0.3)   # polite API usage
    return pd.concat(all_frames, ignore_index=True) if all_frames else pd.DataFrame()

# Midwest corn belt survey areas
midwest = ['IA001','IA003','IL001','IL003','IN001','MN001','MN003','WI001','WI003']
df = batch_query(midwest)
print(f"Retrieved {len(df):,} rows across {df['areasymbol'].nunique()} survey areas")

soilDB-Style Python Wrapper

Build a reusable SDA client class

Python ▼

soilDB is the gold standard R package for SSURGO queries (NCSS Tech Staff). For Python, the pattern below gives you a similar ergonomic API using requests + pandas.

"""
Python SDA client — inspired by R's soilDB package
"""
import requests, pandas as pd

SDA_URL = "https://sdmdataaccess.sc.egov.usda.gov/Tabular/post.rest"

def _post(sql):
    r = requests.post(SDA_URL, data={"query": sql, "format": "JSON+COLUMNNAME"})
    d = r.json()
    if "Table" not in d or len(d["Table"]) < 3:
        return pd.DataFrame()
    return pd.DataFrame(d["Table"][2:], columns=d["Table"][0])

def fetchSDA_component(areasymbol, majcomp_only=True):
    flag = "AND c.majcompflag = 'Yes'" if majcomp_only else ""
    return _post(f"""
        SELECT mu.musym, mu.muname, mu.muacres,
               c.cokey, c.compname, c.comppct_r,
               c.taxclname, c.drainagecl, c.hydgrp
        FROM legend l
        INNER JOIN mapunit mu  ON mu.lkey  = l.lkey
        INNER JOIN component c ON c.mukey  = mu.mukey
        WHERE l.areasymbol = '{areasymbol}' {flag}
        ORDER BY mu.musym, c.comppct_r DESC
    """)

def fetchSDA_horizon(areasymbol):
    return _post(f"""
        SELECT mu.musym, c.compname, c.comppct_r,
               ch.hzname, ch.hzdept_r, ch.hzdepb_r,
               ch.om_r, ch.awc_r, ch.ph1to1h2o_r,
               ch.sandtotal_r, ch.claytotal_r
        FROM legend l
        INNER JOIN mapunit mu  ON mu.lkey  = l.lkey
        INNER JOIN component c ON c.mukey  = mu.mukey
        INNER JOIN chorizon ch ON ch.cokey = c.cokey
        WHERE l.areasymbol = '{areasymbol}'
          AND c.majcompflag = 'Yes'
        ORDER BY mu.musym, c.comppct_r DESC, ch.hzdept_r
    """)

# Usage
comps = fetchSDA_component('WI025')
hzns  = fetchSDA_horizon('WI025')
print(f"Components: {len(comps)}, Horizons: {len(hzns)}")

Full Pipeline — Query → DataFrame → Chart

End-to-end analysis with matplotlib

Python ▼

Combining everything: a single script that queries SDA, processes results into a DataFrame, and produces a publication-quality chart comparing OM by soil taxonomy order and depth.

"""
Full pipeline: SDA → pandas → matplotlib
Organic matter by depth for different soil orders
"""
import requests, pandas as pd, matplotlib.pyplot as plt

SDA_URL = "https://sdmdataaccess.sc.egov.usda.gov/Tabular/post.rest"

def q(sql):
    r = requests.post(SDA_URL, data={"query": sql, "format": "JSON+COLUMNNAME"})
    d = r.json()
    return pd.DataFrame(d["Table"][2:], columns=d["Table"][0]) if "Table" in d else pd.DataFrame()

df = q("""
SELECT c.taxorder,
    CASE
        WHEN ch.hzdepb_r <=  20 THEN '0-20 cm'
        WHEN ch.hzdepb_r <=  50 THEN '20-50 cm'
        WHEN ch.hzdepb_r <= 100 THEN '50-100 cm'
        ELSE '100+ cm'
    END AS depth_zone,
    ROUND(AVG(CAST(ch.om_r AS FLOAT)), 3) AS avg_om
FROM legend l
INNER JOIN mapunit   mu ON mu.lkey  = l.lkey
INNER JOIN component  c ON c.mukey  = mu.mukey
INNER JOIN chorizon  ch ON ch.cokey = c.cokey
WHERE l.areasymbol LIKE 'IA%'
  AND c.majcompflag = 'Yes'
  AND c.taxorder IN ('mollisols','alfisols','inceptisols')
  AND ch.om_r IS NOT NULL
GROUP BY c.taxorder,
    CASE WHEN ch.hzdepb_r<=20 THEN '0-20 cm'
         WHEN ch.hzdepb_r<=50 THEN '20-50 cm'
         WHEN ch.hzdepb_r<=100 THEN '50-100 cm'
         ELSE '100+ cm' END
ORDER BY c.taxorder, MIN(ch.hzdept_r)
""")

df['avg_om'] = pd.to_numeric(df['avg_om'])
fig, ax = plt.subplots(figsize=(10, 5))
for order, grp in df.groupby('taxorder'):
    ax.plot(grp['depth_zone'], grp['avg_om'], marker='o', label=order.title())
ax.set_xlabel('Depth Zone'); ax.set_ylabel('Avg OM (%)')
ax.set_title('Organic Matter by Depth — Iowa Soils by Taxonomy Order')
ax.legend(); ax.grid(alpha=0.3)
plt.tight_layout()
plt.savefig('iowa_om_depth.png', dpi=150)
print("Saved iowa_om_depth.png")

Expected result: Mollisols (prairie soils) have the highest OM at all depths — 4–6% at surface, declining with depth. Alfisols and Inceptisols show much lower values. This pattern reflects millions of years of organic matter accumulation under native grasslands.

Up Next

Take It to the Browser

Query SDA directly from JavaScript — no server required. Build live tables, connect to MapLibre GL maps, and ship interactive soil apps.

JavaScript API →

Python APITrack

Your First Python Call

pandas DataFrame from SDA

Batched Queries — Large Datasets

soilDB-Style Python Wrapper

Full Pipeline — Query → DataFrame → Chart

Take It to the Browser

Python API
Track