Python API Examples

This page demonstrates the Python API with real, working examples using actual OpenStreetMap data. All code examples are executable and produce the output files you can download.

Basic Workflow Example

This example shows the complete end-to-end workflow using Delhi, India data.

The Complete Workflow

#!/usr/bin/env python3
"""
Basic Workflow Example - Delhi Road Sampling

This example demonstrates extracting and sampling road segments from Delhi, India
using both the Python API and CLI commands.
"""

import os
import geo_sampling as gs

def main():
    # Configuration
    country = "India"
    region = "NCT of Delhi"
    sample_size = 1000

    # Create output directory
    output_dir = "examples/outputs/01_basic_workflow"
    os.makedirs(output_dir, exist_ok=True)

    print(f"Extracting roads for {region}, {country}...")

    # Step 1: Extract all road segments
    extractor = gs.RoadExtractor(country, region)
    all_roads = extractor.get_roads()
    print(f"✓ Extracted {len(all_roads)} road segments")

    # Step 2: Random sampling
    sampler = gs.RoadSampler(all_roads)
    sample = sampler.random_sample(sample_size, seed=42)
    print(f"✓ Sampled {len(sample)} segments")

    # Step 3: Save outputs
    all_roads_file = os.path.join(output_dir, "delhi_all_roads.csv")
    sample_file = os.path.join(output_dir, "delhi_sampled_roads.csv")

    sampler.save_csv(all_roads, all_roads_file)
    sampler.save_csv(sample, sample_file)
    print(f"✓ Saved outputs to {output_dir}")

    # Step 4: Visualize results
    gs.plot_road_segments(sample, title=f"Delhi Road Sample (N={len(sample)})")

    # Summary statistics
    road_summary = sampler.get_road_type_summary()
    print("Road type distribution:", dict(road_summary))

if __name__ == "__main__":
    main()

Generated Outputs

This example produces real files you can examine:

📄 delhi_all_roads.csv

  • Size: 1,220 road segments

  • Coverage: Complete NCT of Delhi road network

  • Types: All road types (trunk, primary, residential, etc.)

  • 📥 Download

📄 delhi_sampled_roads.csv

  • Size: 1,000 randomly sampled segments

  • Method: Random sampling with seed=42

  • Use: Ready for field data collection

  • 📥 Download

🖼️ delhi_sample_plot.png

  • Type: Geographic visualization

  • Shows: Sample distribution across Delhi

  • Format: High-resolution PNG

  • 📥 View Plot

🖼️ delhi_comparison_plot.png

  • Type: Side-by-side comparison

  • Shows: All roads vs. sample

  • Purpose: Validate sampling coverage

  • 📥 View Plot

CLI Equivalent Commands

The same workflow using command-line interface:

# Option 1: Complete workflow in one command
geo-sampling workflow "India" "NCT of Delhi" \
    --sample-size 1000 \
    --output delhi_workflow_sample.csv \
    --plot \
    --seed 42

# Option 2: Step-by-step approach
# Extract all roads
geo-sampling extract "India" "NCT of Delhi" \
    --output delhi_all_roads.csv

# Create random sample
geo-sampling sample delhi_all_roads.csv \
    --sample-size 1000 \
    --strategy random \
    --seed 42 \
    --output delhi_sampled_roads.csv \
    --plot

# Get region information
geo-sampling info "India" "NCT of Delhi"

Advanced Python API Features

Convenience Functions

For quick research tasks, use the high-level convenience API:

import geo_sampling as gs

# One-liner for quick sampling
sample = gs.sample_roads_for_region(
    "India", "NCT of Delhi",
    n=1000,
    admin_level=1,
    seed=42
)

# Quick plotting
gs.quick_plot(sample, title="Delhi Sample")

# Get region summary without full extraction
summary = gs.get_road_summary("India", "NCT of Delhi")
print(f"Total roads: {summary['total_segments']:,}")
print(f"Road types: {summary['road_types']}")

Working with Road Types

Filter by specific road types for focused sampling:

# Extract only major roads
extractor = gs.RoadExtractor("India", "NCT of Delhi")
major_roads = extractor.get_roads(
    road_types=["trunk", "primary", "secondary"]
)

print(f"Major roads: {len(major_roads)} segments")

# Sample from major roads only
sampler = gs.RoadSampler(major_roads)
major_sample = sampler.random_sample(500, seed=42)

# Road type distribution
road_summary = sampler.get_road_type_summary()
for road_type, count in road_summary.items():
    percentage = count / len(major_roads) * 100
    print(f"{road_type}: {count} ({percentage:.1f}%)")

Integration Examples

Research Workflow Integration

import geo_sampling as gs
import pandas as pd
import json
from datetime import datetime

def research_sampling_workflow(country, region, sample_size, study_name):
    """Complete research workflow with metadata tracking."""

    # Track methodology
    metadata = {
        "study_name": study_name,
        "country": country,
        "region": region,
        "sample_size": sample_size,
        "date_created": datetime.now().isoformat(),
        "methodology": "stratified_sampling"
    }

    # Extract and sample
    sample = gs.sample_roads_for_region(
        country, region,
        n=sample_size,
        strategy="stratified",
        seed=42
    )

    # Convert to DataFrame for analysis
    sampler = gs.RoadSampler(sample)
    df = sampler.to_dataframe()

    # Add study metadata to DataFrame
    df['study_id'] = study_name
    df['sample_date'] = metadata['date_created']

    # Save with metadata
    output_file = f"{study_name.lower().replace(' ', '_')}_sample.csv"
    df.to_csv(output_file, index=False)

    # Save metadata
    with open(f"{study_name.lower().replace(' ', '_')}_metadata.json", "w") as f:
        json.dump(metadata, f, indent=2)

    print(f"Study '{study_name}' complete:")
    print(f"  Sample size: {len(sample)}")
    print(f"  Output: {output_file}")
    print(f"  Road types: {df['osm_type'].nunique()}")

    return df

# Run research workflow
study_data = research_sampling_workflow(
    "Thailand", "Bangkok",
    sample_size=500,
    study_name="Bangkok Traffic Study 2024"
)

Batch Processing Multiple Regions

import geo_sampling as gs

def batch_process_regions(regions, sample_size=200):
    """Process multiple regions with consistent methodology."""

    results = {}

    for country, region in regions:
        print(f"Processing {region}, {country}...")

        try:
            # Sample roads
            sample = gs.sample_roads_for_region(
                country, region,
                n=sample_size,
                strategy="stratified",
                seed=42
            )

            # Save output
            filename = f"{country.lower()}_{region.lower().replace(' ', '_')}.csv"
            sampler = gs.RoadSampler(sample)
            sampler.save_csv(sample, filename)

            # Track results
            results[f"{country}-{region}"] = {
                "sample_size": len(sample),
                "filename": filename,
                "road_types": len(sampler.get_road_type_summary())
            }

            print(f"  ✓ {len(sample)} segments → {filename}")

        except Exception as e:
            print(f"  ✗ Failed: {e}")
            results[f"{country}-{region}"] = {"error": str(e)}

    return results

# Process multiple regions
regions = [
    ("Singapore", "Central"),
    ("Thailand", "Bangkok"),
    ("India", "NCT of Delhi")
]

batch_results = batch_process_regions(regions, sample_size=300)

# Summary
print("\nBatch Processing Results:")
for region, result in batch_results.items():
    if "error" in result:
        print(f"  ❌ {region}: {result['error']}")
    else:
        print(f"  ✅ {region}: {result['sample_size']} segments, {result['road_types']} road types")

Data Analysis Integration

Working with Pandas

import geo_sampling as gs
import pandas as pd
import matplotlib.pyplot as plt

# Load sample data
segments = gs.load_segments_from_csv("delhi_sampled_roads.csv")
sampler = gs.RoadSampler(segments)

# Convert to DataFrame for analysis
df = sampler.to_dataframe()

# Analyze road type distribution
road_counts = df['osm_type'].value_counts()
print("Road Type Distribution:")
print(road_counts)

# Calculate segment lengths (rough approximation)
df['length_km'] = ((df['end_lat'] - df['start_lat'])**2 +
                   (df['end_long'] - df['start_long'])**2)**0.5 * 111.32

# Summary statistics by road type
summary_stats = df.groupby('osm_type').agg({
    'length_km': ['count', 'mean', 'sum'],
    'start_lat': ['min', 'max'],
    'start_long': ['min', 'max']
}).round(3)

print("\nSummary by Road Type:")
print(summary_stats)

# Visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Road type distribution
road_counts.plot(kind='bar', ax=ax1)
ax1.set_title('Road Type Distribution')
ax1.set_ylabel('Number of Segments')

# Length distribution
df.boxplot(column='length_km', by='osm_type', ax=ax2)
ax2.set_title('Segment Length by Road Type')
ax2.set_ylabel('Length (km)')

plt.tight_layout()
plt.show()

Next Steps

Source Code

The complete source code for these examples is available: