Python API Examples¶
This page demonstrates the Python API with real, working examples using actual OpenStreetMap data. All code examples are executable and produce the output files you can download.
Basic Workflow Example¶
This example shows the complete end-to-end workflow using Delhi, India data.
The Complete Workflow¶
#!/usr/bin/env python3
"""
Basic Workflow Example - Delhi Road Sampling
This example demonstrates extracting and sampling road segments from Delhi, India
using both the Python API and CLI commands.
"""
import os
import geo_sampling as gs
def main():
# Configuration
country = "India"
region = "NCT of Delhi"
sample_size = 1000
# Create output directory
output_dir = "examples/outputs/01_basic_workflow"
os.makedirs(output_dir, exist_ok=True)
print(f"Extracting roads for {region}, {country}...")
# Step 1: Extract all road segments
extractor = gs.RoadExtractor(country, region)
all_roads = extractor.get_roads()
print(f"✓ Extracted {len(all_roads)} road segments")
# Step 2: Random sampling
sampler = gs.RoadSampler(all_roads)
sample = sampler.random_sample(sample_size, seed=42)
print(f"✓ Sampled {len(sample)} segments")
# Step 3: Save outputs
all_roads_file = os.path.join(output_dir, "delhi_all_roads.csv")
sample_file = os.path.join(output_dir, "delhi_sampled_roads.csv")
sampler.save_csv(all_roads, all_roads_file)
sampler.save_csv(sample, sample_file)
print(f"✓ Saved outputs to {output_dir}")
# Step 4: Visualize results
gs.plot_road_segments(sample, title=f"Delhi Road Sample (N={len(sample)})")
# Summary statistics
road_summary = sampler.get_road_type_summary()
print("Road type distribution:", dict(road_summary))
if __name__ == "__main__":
main()
Generated Outputs¶
This example produces real files you can examine:
📄 delhi_all_roads.csv
Size: 1,220 road segments
Coverage: Complete NCT of Delhi road network
Types: All road types (trunk, primary, residential, etc.)
📄 delhi_sampled_roads.csv
Size: 1,000 randomly sampled segments
Method: Random sampling with seed=42
Use: Ready for field data collection
🖼️ delhi_sample_plot.png
Type: Geographic visualization
Shows: Sample distribution across Delhi
Format: High-resolution PNG
🖼️ delhi_comparison_plot.png
Type: Side-by-side comparison
Shows: All roads vs. sample
Purpose: Validate sampling coverage
CLI Equivalent Commands¶
The same workflow using command-line interface:
# Option 1: Complete workflow in one command
geo-sampling workflow "India" "NCT of Delhi" \
--sample-size 1000 \
--output delhi_workflow_sample.csv \
--plot \
--seed 42
# Option 2: Step-by-step approach
# Extract all roads
geo-sampling extract "India" "NCT of Delhi" \
--output delhi_all_roads.csv
# Create random sample
geo-sampling sample delhi_all_roads.csv \
--sample-size 1000 \
--strategy random \
--seed 42 \
--output delhi_sampled_roads.csv \
--plot
# Get region information
geo-sampling info "India" "NCT of Delhi"
Advanced Python API Features¶
Convenience Functions¶
For quick research tasks, use the high-level convenience API:
import geo_sampling as gs
# One-liner for quick sampling
sample = gs.sample_roads_for_region(
"India", "NCT of Delhi",
n=1000,
admin_level=1,
seed=42
)
# Quick plotting
gs.quick_plot(sample, title="Delhi Sample")
# Get region summary without full extraction
summary = gs.get_road_summary("India", "NCT of Delhi")
print(f"Total roads: {summary['total_segments']:,}")
print(f"Road types: {summary['road_types']}")
Working with Road Types¶
Filter by specific road types for focused sampling:
# Extract only major roads
extractor = gs.RoadExtractor("India", "NCT of Delhi")
major_roads = extractor.get_roads(
road_types=["trunk", "primary", "secondary"]
)
print(f"Major roads: {len(major_roads)} segments")
# Sample from major roads only
sampler = gs.RoadSampler(major_roads)
major_sample = sampler.random_sample(500, seed=42)
# Road type distribution
road_summary = sampler.get_road_type_summary()
for road_type, count in road_summary.items():
percentage = count / len(major_roads) * 100
print(f"{road_type}: {count} ({percentage:.1f}%)")
Integration Examples¶
Research Workflow Integration¶
import geo_sampling as gs
import pandas as pd
import json
from datetime import datetime
def research_sampling_workflow(country, region, sample_size, study_name):
"""Complete research workflow with metadata tracking."""
# Track methodology
metadata = {
"study_name": study_name,
"country": country,
"region": region,
"sample_size": sample_size,
"date_created": datetime.now().isoformat(),
"methodology": "stratified_sampling"
}
# Extract and sample
sample = gs.sample_roads_for_region(
country, region,
n=sample_size,
strategy="stratified",
seed=42
)
# Convert to DataFrame for analysis
sampler = gs.RoadSampler(sample)
df = sampler.to_dataframe()
# Add study metadata to DataFrame
df['study_id'] = study_name
df['sample_date'] = metadata['date_created']
# Save with metadata
output_file = f"{study_name.lower().replace(' ', '_')}_sample.csv"
df.to_csv(output_file, index=False)
# Save metadata
with open(f"{study_name.lower().replace(' ', '_')}_metadata.json", "w") as f:
json.dump(metadata, f, indent=2)
print(f"Study '{study_name}' complete:")
print(f" Sample size: {len(sample)}")
print(f" Output: {output_file}")
print(f" Road types: {df['osm_type'].nunique()}")
return df
# Run research workflow
study_data = research_sampling_workflow(
"Thailand", "Bangkok",
sample_size=500,
study_name="Bangkok Traffic Study 2024"
)
Batch Processing Multiple Regions¶
import geo_sampling as gs
def batch_process_regions(regions, sample_size=200):
"""Process multiple regions with consistent methodology."""
results = {}
for country, region in regions:
print(f"Processing {region}, {country}...")
try:
# Sample roads
sample = gs.sample_roads_for_region(
country, region,
n=sample_size,
strategy="stratified",
seed=42
)
# Save output
filename = f"{country.lower()}_{region.lower().replace(' ', '_')}.csv"
sampler = gs.RoadSampler(sample)
sampler.save_csv(sample, filename)
# Track results
results[f"{country}-{region}"] = {
"sample_size": len(sample),
"filename": filename,
"road_types": len(sampler.get_road_type_summary())
}
print(f" ✓ {len(sample)} segments → {filename}")
except Exception as e:
print(f" ✗ Failed: {e}")
results[f"{country}-{region}"] = {"error": str(e)}
return results
# Process multiple regions
regions = [
("Singapore", "Central"),
("Thailand", "Bangkok"),
("India", "NCT of Delhi")
]
batch_results = batch_process_regions(regions, sample_size=300)
# Summary
print("\nBatch Processing Results:")
for region, result in batch_results.items():
if "error" in result:
print(f" ❌ {region}: {result['error']}")
else:
print(f" ✅ {region}: {result['sample_size']} segments, {result['road_types']} road types")
Data Analysis Integration¶
Working with Pandas¶
import geo_sampling as gs
import pandas as pd
import matplotlib.pyplot as plt
# Load sample data
segments = gs.load_segments_from_csv("delhi_sampled_roads.csv")
sampler = gs.RoadSampler(segments)
# Convert to DataFrame for analysis
df = sampler.to_dataframe()
# Analyze road type distribution
road_counts = df['osm_type'].value_counts()
print("Road Type Distribution:")
print(road_counts)
# Calculate segment lengths (rough approximation)
df['length_km'] = ((df['end_lat'] - df['start_lat'])**2 +
(df['end_long'] - df['start_long'])**2)**0.5 * 111.32
# Summary statistics by road type
summary_stats = df.groupby('osm_type').agg({
'length_km': ['count', 'mean', 'sum'],
'start_lat': ['min', 'max'],
'start_long': ['min', 'max']
}).round(3)
print("\nSummary by Road Type:")
print(summary_stats)
# Visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# Road type distribution
road_counts.plot(kind='bar', ax=ax1)
ax1.set_title('Road Type Distribution')
ax1.set_ylabel('Number of Segments')
# Length distribution
df.boxplot(column='length_km', by='osm_type', ax=ax2)
ax2.set_title('Segment Length by Road Type')
ax2.set_ylabel('Length (km)')
plt.tight_layout()
plt.show()
Next Steps¶
🎯 Explore Advanced Sampling Strategies
💻 Learn the Command Line Interface
📚 Check the API Reference
📁 Download all example outputs
Source Code¶
The complete source code for these examples is available: