platform data bundle

this is pretty low-effort, but it seems to work fine?
This commit is contained in:
stuebinm 2023-11-05 18:49:22 +01:00
parent 194bccdc10
commit aebc83a19f
6 changed files with 29676 additions and 28 deletions

File diff suppressed because it is too large Load diff

View file

@ -5,3 +5,7 @@
More curent versions may be listed at <https://www.bahn.de/agb> as "Leitpunktkürzel" in the section "Entfernungswerk des Deutschlandtarifs".
The RL100-Code combination was added manually and is not guaranteed to be accurate. Some places do not have a one-to-one mapping between Leitpunkt and RL100.
- `platforms.tsv` the data included in this document is from www.openstreetmap.org. The data is made available under ODbL.
- `DBNetz-Betriebsstellenverzeichnis-Stand2021-10.csv` has been released under the Creative Commons Attribution 4.0 International (CC BY 4.0) Licence by DB Netz AG, see https://data.deutschebahn.com/dataset/data-betriebsstellen.html

6020
data/platforms.tsv Normal file

File diff suppressed because it is too large Load diff

View file

@ -15,6 +15,7 @@ gleam_http = "~> 3.0"
mist = "~> 0.4"
gleam_hackney = "~> 1.0"
haystack = "~> 0.1"
gleam_erlang = "~> 0.22"
[dev-dependencies]
gleeunit = "~> 0.6"

View file

@ -5,19 +5,19 @@ packages = [
{ name = "certifi", version = "2.12.0", build_tools = ["rebar3"], requirements = [], otp_app = "certifi", source = "hex", outer_checksum = "EE68D85DF22E554040CDB4BE100F33873AC6051387BAF6A8F6CE82272340FF1C" },
{ name = "decimal", version = "2.1.1", build_tools = ["mix"], requirements = [], otp_app = "decimal", source = "hex", outer_checksum = "53CFE5F497ED0E7771AE1A475575603D77425099BA5FAEF9394932B35020FFCC" },
{ name = "gleam_erlang", version = "0.22.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_erlang", source = "hex", outer_checksum = "367D8B41A7A86809928ED1E7E55BFD0D46D7C4CF473440190F324AFA347109B4" },
{ name = "gleam_hackney", version = "1.1.0", build_tools = ["gleam"], requirements = ["hackney", "gleam_http", "gleam_stdlib"], otp_app = "gleam_hackney", source = "hex", outer_checksum = "CA69AD9061C4A8775A7BD445DE33ECEFD87379AF8E5B028F3DD0216BECA5DD0B" },
{ name = "gleam_hackney", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_http", "gleam_stdlib", "hackney"], otp_app = "gleam_hackney", source = "hex", outer_checksum = "CA69AD9061C4A8775A7BD445DE33ECEFD87379AF8E5B028F3DD0216BECA5DD0B" },
{ name = "gleam_http", version = "3.5.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_http", source = "hex", outer_checksum = "FAE9AE3EB1CA90C2194615D20FFFD1E28B630E84DACA670B28D959B37BCBB02C" },
{ name = "gleam_otp", version = "0.7.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib"], otp_app = "gleam_otp", source = "hex", outer_checksum = "ED7381E90636E18F5697FD7956EECCA635A3B65538DC2BE2D91A38E61DCE8903" },
{ name = "gleam_otp", version = "0.7.0", build_tools = ["gleam"], requirements = ["gleam_stdlib", "gleam_erlang"], otp_app = "gleam_otp", source = "hex", outer_checksum = "ED7381E90636E18F5697FD7956EECCA635A3B65538DC2BE2D91A38E61DCE8903" },
{ name = "gleam_stdlib", version = "0.31.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "6D1BC5B4D4179B9FEE866B1E69FE180AC2CE485AD90047C0B32B2CA984052736" },
{ name = "gleeunit", version = "0.11.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "1397E5C4AC4108769EE979939AC39BF7870659C5AFB714630DEEEE16B8272AD5" },
{ name = "glisten", version = "0.9.1", build_tools = ["gleam"], requirements = ["gleam_stdlib", "gleam_otp", "gleam_erlang"], otp_app = "glisten", source = "hex", outer_checksum = "91809C44C52456D96C8317A19246DE1C06ED494C40D282CD9380565E879A52C4" },
{ name = "hackney", version = "1.20.1", build_tools = ["rebar3"], requirements = ["certifi", "mimerl", "metrics", "parse_trans", "ssl_verify_fun", "unicode_util_compat", "idna"], otp_app = "hackney", source = "hex", outer_checksum = "FE9094E5F1A2A2C0A7D10918FEE36BFEC0EC2A979994CFF8CFE8058CD9AF38E3" },
{ name = "haystack", version = "0.1.0", build_tools = ["mix"], requirements = ["jason", "stemmer"], otp_app = "haystack", source = "hex", outer_checksum = "27A582513EF933C1B11345B96F8D41EE137D03B25312BD85068FFE8FEC503635" },
{ name = "glisten", version = "0.9.1", build_tools = ["gleam"], requirements = ["gleam_otp", "gleam_erlang", "gleam_stdlib"], otp_app = "glisten", source = "hex", outer_checksum = "91809C44C52456D96C8317A19246DE1C06ED494C40D282CD9380565E879A52C4" },
{ name = "hackney", version = "1.20.1", build_tools = ["rebar3"], requirements = ["mimerl", "certifi", "ssl_verify_fun", "metrics", "idna", "unicode_util_compat", "parse_trans"], otp_app = "hackney", source = "hex", outer_checksum = "FE9094E5F1A2A2C0A7D10918FEE36BFEC0EC2A979994CFF8CFE8058CD9AF38E3" },
{ name = "haystack", version = "0.1.0", build_tools = ["mix"], requirements = ["stemmer", "jason"], otp_app = "haystack", source = "hex", outer_checksum = "27A582513EF933C1B11345B96F8D41EE137D03B25312BD85068FFE8FEC503635" },
{ name = "idna", version = "6.1.1", build_tools = ["rebar3"], requirements = ["unicode_util_compat"], otp_app = "idna", source = "hex", outer_checksum = "92376EB7894412ED19AC475E4A86F7B413C1B9FBB5BD16DCCD57934157944CEA" },
{ name = "jason", version = "1.4.1", build_tools = ["mix"], requirements = ["decimal"], otp_app = "jason", source = "hex", outer_checksum = "FBB01ECDFD565B56261302F7E1FCC27C4FB8F32D56EAB74DB621FC154604A7A1" },
{ name = "metrics", version = "1.0.1", build_tools = ["rebar3"], requirements = [], otp_app = "metrics", source = "hex", outer_checksum = "69B09ADDDC4F74A40716AE54D140F93BEB0FB8978D8636EADED0C31B6F099F16" },
{ name = "mimerl", version = "1.2.0", build_tools = ["rebar3"], requirements = [], otp_app = "mimerl", source = "hex", outer_checksum = "F278585650AA581986264638EBF698F8BB19DF297F66AD91B18910DFC6E19323" },
{ name = "mist", version = "0.14.0", build_tools = ["gleam"], requirements = ["gleam_stdlib", "glisten", "gleam_http", "gleam_erlang", "gleam_otp"], otp_app = "mist", source = "hex", outer_checksum = "7CDD0396D9A556F1069D83E9AF2B24388AAC478B9B4846615C6D4797E1D3C6A3" },
{ name = "mist", version = "0.14.0", build_tools = ["gleam"], requirements = ["gleam_stdlib", "gleam_erlang", "gleam_http", "gleam_otp", "glisten"], otp_app = "mist", source = "hex", outer_checksum = "7CDD0396D9A556F1069D83E9AF2B24388AAC478B9B4846615C6D4797E1D3C6A3" },
{ name = "parse_trans", version = "3.4.1", build_tools = ["rebar3"], requirements = [], otp_app = "parse_trans", source = "hex", outer_checksum = "620A406CE75DADA827B82E453C19CF06776BE266F5A67CFF34E1EF2CBB60E49A" },
{ name = "ssl_verify_fun", version = "1.1.7", build_tools = ["mix", "rebar3", "make"], requirements = [], otp_app = "ssl_verify_fun", source = "hex", outer_checksum = "FE4C190E8F37401D30167C8C405EDA19469F34577987C76DDE613E838BBC67F8" },
{ name = "stemmer", version = "1.1.0", build_tools = ["mix"], requirements = [], otp_app = "stemmer", source = "hex", outer_checksum = "0CB5FAF73476B84500E371FF39FD9A494F60AB31D991689C1CD53B920556228F" },
@ -25,6 +25,7 @@ packages = [
]
[requirements]
gleam_erlang = { version = "~> 0.22" }
gleam_hackney = { version = "~> 1.0" }
gleam_http = { version = "~> 3.0" }
gleam_stdlib = { version = "~> 0.19" }

View file

@ -89,7 +89,7 @@ type IdKind {
type Matched(t) {
Exact(t)
Fuzzy(t)
Fuzzy(t, t)
Failed
}
@ -136,7 +136,7 @@ fn lookup_fuzzy(
) -> #(Int, String) {
case fuzzy(query, kind) {
Exact(res) -> #(200, res)
Fuzzy(res) -> #(302, res)
Fuzzy(res, _) -> #(302, res)
Failed -> #(404, "??")
}
}
@ -148,31 +148,72 @@ fn if_not(res: #(Int, t), fallback: fn() -> #(Int, t)) -> #(Int, t) {
})
}
fn lookup_station(
request: Request(t),
ds100_to_name: Map(String, String),
leitpunkt_to_name: Map(String, String),
lookup_platform: fn(String) -> String,
fuzzy: fn(String, IdKind) -> Matched(String),
) -> Response(mist.ResponseData) {
let #(code, text) = case request {
let #(#(code, text), is_html) = case request {
// blackhole favicon.ico requests instead of using the index
Request(method: Get, path: "/favicon.ico", ..) -> #(404, "")
Request(method: Get, path: "/favicon.ico", ..) -> #(#(404, ""), False)
Request(method: Get, path: "/help", ..)
| Request(method: Get, path: "/", ..) -> #(
| Request(method: Get, path: "/", ..) -> #(#(
200,
"ril100 → Name: " <> proto <> ril100_domain <> "/HG\n" <> "Name → ril100: " <> proto <> ril100_domain <> "/Göttingen\n\n" <> "Leitpunkt → Name: " <> proto <> leitpunkt_domain <> "/GOE\n" <> "Name → Leitpunkt: " <> proto <> leitpunkt_domain <> "/Göttingen\n\n" <> "Fuzzy:" <> proto <> domain <> "/...",
)
"ril100 → Name: " <> proto <> ril100_domain <> "/HG\n" <>
"Name → ril100: " <> proto <> ril100_domain <> "/Göttingen\n\n" <>
"Leitpunkt → Name: " <> proto <> leitpunkt_domain <> "/GOE\n" <>
"Name → Leitpunkt: " <> proto <> leitpunkt_domain <> "/Göttingen\n\n" <>
"Fuzzy:" <> proto <> domain <> "/...",
), False)
Request(method: Get, path: "/" <> path, ..) -> {
let query = unpercent(path)
case get_header(request, "x-forwarded-host") {
Ok(domain) if domain == leitpunkt_domain ->
let raw_query = unpercent(path)
let show_platforms = string.ends_with(raw_query, "/gleis")
|| string.ends_with(raw_query, "/bahnsteig")
|| string.ends_with(raw_query, "/platforms")
|| string.ends_with(raw_query, "/tracks")
|| string.ends_with(raw_query, "/platform")
|| string.ends_with(raw_query, "/track")
let query = raw_query
|> string.replace("/gleis","")
|> string.replace("/bahnsteig","")
|> string.replace("/platforms","")
|> string.replace("/tracks","")
|> string.replace("/platform","")
|> string.replace("/track","")
case #(show_platforms, get_header(request, "x-forwarded-host")) {
#(False, Ok(domain)) if domain == leitpunkt_domain ->
query
|> lookup_exact(leitpunkt_to_name)
|> if_not(fn() { lookup_fuzzy(query, Leitpunkt, fuzzy) })
Ok(domain) if domain == ril100_domain || domain == ds100_domain ->
|> pair.new(False)
#(False, Ok(domain)) if domain == ril100_domain || domain == ds100_domain ->
query
|> lookup_exact(ds100_to_name)
|> if_not(fn() { lookup_fuzzy(query, DS100, fuzzy) })
|> pair.new(False)
#(True, Ok(domain)) if domain == leitpunkt_domain -> {
let query = case map.get(leitpunkt_to_name, query) {
Ok(name) -> name
_ -> query
}
case fuzzy(query, DS100) {
Exact(code) -> #(200, lookup_platform(code))
Fuzzy(_, code) -> #(200, lookup_platform(code))
_ -> #(404, "")
} |> pair.new(True)
}
#(True, Ok(domain)) if domain == ril100_domain || domain == ds100_domain ->
case lookup_exact(query, ds100_to_name) {
#(200,_) -> #(200, lookup_platform(query))
_ -> case fuzzy(query, DS100) {
Exact(code) -> #(200, lookup_platform(code))
Fuzzy(_, code) -> #(200, lookup_platform(code))
_ -> #(404, "")
}
} |> pair.new(True)
_ -> {
let by_ds100 = lookup_exact(query, ds100_to_name)
let by_lp = lookup_exact(query, leitpunkt_to_name)
@ -180,16 +221,21 @@ fn lookup_station(
#(200, _) -> #(302, proto <> ril100_domain <> "/" <> path)
#(_, 200) -> #(302, proto <> leitpunkt_domain <> "/" <> path)
_ -> #(302, proto <> ril100_domain <> "/" <> path)
} |> pair.new(False)
}
}
}
}
_ -> #(404, "intended usage is e.g. curl " <> proto <> domain <> "/FF")
_ -> #(#(404, "intended usage is e.g. curl " <> proto <> domain <> "/FF"), False)
}
let body = text
|> bit_builder.from_string
|> mist.Bytes
let content_type = case is_html {
True -> "text/html; charset=utf8"
False -> "text/plain; charset=utf8"
}
response.new(code)
|> response.prepend_header(
"x-data-source",
@ -199,7 +245,7 @@ fn lookup_station(
"x-sources-at",
"https://stuebinm.eu/git/bahnhof.name",
)
|> response.prepend_header("content-type", "text/plain; charset=utf8")
|> response.prepend_header("content-type", content_type)
|> fn(a) {
case code == 302 {
True -> response.prepend_header(a, "location", text)
@ -210,10 +256,10 @@ fn lookup_station(
}
pub fn main() {
let assert Ok(bahn_ril100) = fetch_data()
let assert Ok(bahn_ril100) = file.read("data/DBNetz-Betriebsstellenverzeichnis-Stand2021-10.csv")
let ds100s =
read_csv(bahn_ril100)
read_csv(bahn_ril100, ";")
|> list.filter_map(fn(fields) {
case fields {
[_, ds100, name, ..] -> Ok(#(name, ds100))
@ -222,13 +268,15 @@ pub fn main() {
})
let assert Ok(leitpunkte_raw) = file.read("data/leitpunkte.csv")
let leitpunkte =
read_csv(leitpunkte_raw)
read_csv(leitpunkte_raw, ";")
|> list.filter_map(fn(fields) {
case fields {
[lp, name, _ds100] -> Ok(#(name, lp))
_ -> Error(fields)
}
})
let assert Ok(platforms_raw) = file.read("data/platforms.tsv")
let platforms = read_csv(platforms_raw, "\t")
let name_to_ds100 = map.from_list(ds100s)
let name_to_leitpunkt = map.from_list(leitpunkte)
@ -321,19 +369,36 @@ pub fn main() {
fuzzy(searchterm, kind)
|> list.filter_map(fn(res) { map.get(ids, string.uppercase(res)) })
case results {
[res] -> Fuzzy(res)
[res, ..] -> Fuzzy(res)
[res] -> {
let assert Ok(station) = map.get(stations, res)
Fuzzy(res, station)
}
[res, ..] -> {
let assert Ok(station) = map.get(stations, res)
Fuzzy(res, station)
}
_ -> Failed
}
}
}
}
let lookup_platform = fn(ds100: String) -> String {
inspect(ds100)
platforms
|> list.filter(fn(a) { list.first(a) == Ok(ds100) })
|> list.map(fn(line) { case line {
[_code,osmid,osmtype,info] -> "<a href=\"https://osm.org/"<>osmtype<>"/"<>osmid<>"\">"<>info<>"</a>"
}})
|> string.join("<br>\n")
|> inspect
}
io.println("compiled indices, starting server …")
let assert Ok(_) =
fn(req: Request(mist.Connection)) -> Response(mist.ResponseData) {
lookup_station(req, ds100_to_name, leitpunkt_to_name, exact_then_fuzzy)
lookup_station(req, ds100_to_name, leitpunkt_to_name, lookup_platform, exact_then_fuzzy)
}
|> mist.new
|> mist.port(2345)
@ -348,17 +413,18 @@ fn fetch_data() -> Result(String, hackney.Error) {
"https://download-data.deutschebahn.com/static/datasets/betriebsstellen/DBNetz-Betriebsstellenverzeichnis-Stand2021-10.csv",
)
let assert Ok(request) = request.from_uri(uri)
io.println("got response")
let assert Ok(response) = hackney.send(request)
// some ü are corrupted for some reason
Ok(string.replace(response.body, "<EFBFBD>", "ü"))
}
fn read_csv(contents) -> List(List(String)) {
fn read_csv(contents, sep) -> List(List(String)) {
contents
// the file doesn't use quotes, so this is fine
|> string.split(on: "\n")
// drop CSV header
|> list.drop(1)
|> list.map(fn(a) { string.split(a, on: ";") })
|> list.map(fn(a) { string.split(a, on: sep) })
}