"""
日本の地名解決サービス
"""
import csv
import os
import re
import unicodedata
from pathlib import Path
from typing import Dict, List, Optional, Tuple
class LocationResolver:
"""日本の地名を緯度経度に変換するサービス"""
def __init__(self):
self.locations_data: List[Dict[str, str]] = []
self.load_locations_data()
def load_locations_data(self):
"""地名データベース(CSV)を読み込み"""
try:
# パッケージのルートディレクトリからのパスを取得
current_dir = Path(__file__).parent
project_root = current_dir.parent.parent
csv_path = project_root / "data" / "japanese_locations.csv"
if not csv_path.exists():
raise FileNotFoundError(f"地名データベースが見つかりません: {csv_path}")
with open(csv_path, "r", encoding="utf-8") as file:
reader = csv.DictReader(file)
self.locations_data = list(reader)
if not self.locations_data:
raise ValueError("地名データベースが空です")
except Exception as e:
raise Exception(f"地名データベースの読み込みエラー: {str(e)}")
async def resolve_location(self, location_query: str) -> Tuple[float, float]:
"""
地名クエリを緯度経度に変換
Args:
location_query: 地名(都道府県名、市区町村名)
Returns:
(緯度, 経度)のタプル
"""
# クエリを正規化
normalized_query = self._normalize_text(location_query)
# 完全一致を試行
location = self._find_exact_match(normalized_query)
if location:
return float(location["latitude"]), float(location["longitude"])
# 部分一致を試行
location = self._find_partial_match(normalized_query)
if location:
return float(location["latitude"]), float(location["longitude"])
# 曖昧一致を試行(ひらがな・カタカナ変換)
location = self._find_fuzzy_match(normalized_query)
if location:
return float(location["latitude"]), float(location["longitude"])
# 見つからない場合
similar_locations = await self.search_locations(location_query)
if similar_locations:
suggestion_text = "、".join(
[loc["display_name"] for loc in similar_locations[:3]]
)
raise ValueError(
f"地名が見つかりません: '{location_query}'. 候補: {suggestion_text}"
)
else:
raise ValueError(f"地名が見つかりません: '{location_query}'")
async def search_locations(self, query: str) -> List[Dict[str, str]]:
"""
地名を検索して候補を返す
Args:
query: 検索クエリ
Returns:
マッチした地域のリスト
"""
normalized_query = self._normalize_text(query)
matches = []
for location in self.locations_data:
# 各フィールドで検索
search_fields = [
location["name_ja"],
location["name_hiragana"],
location["name_katakana"],
location["prefecture"],
location["city"],
]
for field in search_fields:
normalized_field = self._normalize_text(field)
if normalized_query in normalized_field:
display_name = f"{location['prefecture']}{location['city']}"
if display_name not in [m["display_name"] for m in matches]:
matches.append(
{
"prefecture": location["prefecture"],
"city": location["city"],
"display_name": display_name,
"latitude": location["latitude"],
"longitude": location["longitude"],
}
)
break
# スコアでソート(クエリとの一致度)
matches.sort(
key=lambda x: self._calculate_match_score(
normalized_query, x["display_name"]
)
)
return matches[:10] # 最大10件
def _find_exact_match(self, query: str) -> Optional[Dict[str, str]]:
"""完全一致検索"""
for location in self.locations_data:
search_targets = [
self._normalize_text(location["name_ja"]),
self._normalize_text(location["city"]),
self._normalize_text(f"{location['prefecture']}{location['city']}"),
self._normalize_text(location["prefecture"]),
]
if query in search_targets:
return location
return None
def _find_partial_match(self, query: str) -> Optional[Dict[str, str]]:
"""部分一致検索"""
best_match = None
best_score = 0
for location in self.locations_data:
search_targets = [
(self._normalize_text(location["name_ja"]), 1.0),
(self._normalize_text(location["city"]), 0.9),
(
self._normalize_text(f"{location['prefecture']}{location['city']}"),
0.8,
),
(self._normalize_text(location["prefecture"]), 0.7),
]
for target, weight in search_targets:
if query in target or target in query:
score = weight * (len(query) / max(len(target), len(query)))
if score > best_score:
best_score = score
best_match = location
return best_match if best_score > 0.3 else None
def _find_fuzzy_match(self, query: str) -> Optional[Dict[str, str]]:
"""曖昧一致検索(ひらがな・カタカナ変換)"""
# ひらがな・カタカナを相互変換して検索
hiragana_query = self._katakana_to_hiragana(query)
katakana_query = self._hiragana_to_katakana(query)
for location in self.locations_data:
search_targets = [
self._normalize_text(location["name_hiragana"]),
self._normalize_text(location["name_katakana"]),
]
for target in search_targets:
if (
hiragana_query in target
or katakana_query in target
or query in target
):
return location
return None
def _normalize_text(self, text: str) -> str:
"""テキストの正規化"""
if not text:
return ""
# Unicode正規化
text = unicodedata.normalize("NFKC", text)
# 英数字を半角に統一
text = text.translate(
str.maketrans(
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
)
)
# 不要な文字を除去
text = re.sub(r"[\\s\\-\\u3000]", "", text)
return text.lower()
def _katakana_to_hiragana(self, text: str) -> str:
"""カタカナをひらがなに変換"""
return "".join(
[chr(ord(char) - 0x60) if "ァ" <= char <= "ヶ" else char for char in text]
)
def _hiragana_to_katakana(self, text: str) -> str:
"""ひらがなをカタカナに変換"""
return "".join(
[chr(ord(char) + 0x60) if "ぁ" <= char <= "ゖ" else char for char in text]
)
def _calculate_match_score(self, query: str, target: str) -> float:
"""マッチスコアを計算"""
target_normalized = self._normalize_text(target)
if query == target_normalized:
return 1.0
elif query in target_normalized:
return 0.8
elif target_normalized in query:
return 0.6
else:
# 編集距離ベースのスコア
return 1.0 - (
self._levenshtein_distance(query, target_normalized)
/ max(len(query), len(target_normalized))
)
def _levenshtein_distance(self, s1: str, s2: str) -> int:
"""レーベンシュタイン距離を計算"""
if len(s1) < len(s2):
return self._levenshtein_distance(s2, s1)
if len(s2) == 0:
return len(s1)
previous_row = list(range(len(s2) + 1))
for i, c1 in enumerate(s1):
current_row = [i + 1]
for j, c2 in enumerate(s2):
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
return previous_row[-1]
def resolve_ambiguous_location(
self, prefecture: str, city: str
) -> Tuple[float, float]:
"""
曖昧な地名を都道府県と市で解決
Args:
prefecture: 都道府県名
city: 市区町村名
Returns:
(緯度, 経度)のタプル
"""
normalized_prefecture = self._normalize_text(prefecture)
normalized_city = self._normalize_text(city)
for location in self.locations_data:
if (
self._normalize_text(location["prefecture"]) == normalized_prefecture
and self._normalize_text(location["city"]) == normalized_city
):
return float(location["latitude"]), float(location["longitude"])
raise ValueError(f"地名が見つかりません: {prefecture}{city}")