From 9354b51f2498ff21ecf5e35ce5949342b1ed4302 Mon Sep 17 00:00:00 2001 From: Tipragot Date: Sun, 24 Dec 2023 18:17:12 +0100 Subject: [PATCH] =?UTF-8?q?R=C3=A9cup=C3=A9ration=20des=20donn=C3=A9es=20d?= =?UTF-8?q?es=20metiers=20et=20des=20formations=20(sans=20documentation)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/Cargo.toml | 3 - backend/src/lib.rs | 171 ++++++++++++++++++++++++++++++++++---------- backend/src/main.rs | 7 +- 3 files changed, 138 insertions(+), 43 deletions(-) diff --git a/backend/Cargo.toml b/backend/Cargo.toml index 0ef02c4..51a2b34 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -5,11 +5,8 @@ edition = "2021" [dependencies] anyhow = "1.0.76" -bytes = "1.5.0" -paste = "1.0.14" quick-xml = { version = "0.31.0", features = ["serde", "serialize"] } reqwest = "0.11.23" serde = { version = "1.0.193", features = ["derive"] } -serde-xml-rs = "0.6.0" tokio = { version = "1.35.1", features = ["full"] } zip = "0.6.6" diff --git a/backend/src/lib.rs b/backend/src/lib.rs index f702cb4..0f40381 100644 --- a/backend/src/lib.rs +++ b/backend/src/lib.rs @@ -1,51 +1,144 @@ // https://api.opendata.onisep.fr/downloads/5fe0808a2da6f/5fe0808a2da6f.zip -use paste::paste; +use quick_xml::events::Event; +use serde::de::Error; use serde::{Deserialize, Deserializer}; -use std::{ - io::{BufReader, Cursor}, - os::windows::fs::FileExt, -}; +use std::collections::HashMap; +use std::io::{BufReader, Cursor}; +use zip::ZipArchive; -macro_rules! unwrap_vec { - ($name:ident, $t:ty) => { - paste! { - fn []<'de, D: Deserializer<'de>>(deserializer: D) -> Result, D::Error> { - #[derive(Deserialize)] - struct List { - #[serde(default)] - $name: Vec<$t>, - } - Ok(List::deserialize(deserializer)?.$name) - } - } - }; +async fn download_zip(url: &str) -> anyhow::Result>>> { + let response = reqwest::get(url).await?; + let response = Cursor::new(response.bytes().await?.to_vec()); + Ok(zip::ZipArchive::new(response)?) } -#[derive(Deserialize, Debug)] -pub struct Metiers { - #[serde(rename = "$value")] - metiers: Vec, -} - -unwrap_vec!(synonyme, String); - -#[derive(Deserialize, Debug)] +#[derive(Default, PartialEq, Debug, Deserialize)] pub struct Metier { + #[serde(rename = "identifiant")] + id: String, + + nom_metier: String, libelle_feminin: String, libelle_masculin: String, - #[serde(deserialize_with = "unwrap_synonymes")] - synonymes: Vec, - acces_metier: String, + + #[serde(deserialize_with = "synonymes")] + pub synonymes: Vec, + + #[serde(deserialize_with = "paragraph_list")] + competences: Vec, + + #[serde(rename = "acces_metier", deserialize_with = "paragraph_list")] + conditions: Vec, + + #[serde(rename = "niveau_acces_min", deserialize_with = "minimal_level")] + minimal_level: String, + + #[serde(rename = "formations_min_requise", deserialize_with = "formations")] + formations: Vec, } -pub async fn download_metier() -> anyhow::Result { - let response = - reqwest::get("https://api.opendata.onisep.fr/downloads/5fe0808a2da6f/5fe0808a2da6f.zip") - .await?; - let response = Cursor::new(response.bytes().await?.to_vec()); - let mut archive = zip::ZipArchive::new(response)?; - let file = archive.by_index(0)?; - let metiers = quick_xml::de::from_reader(BufReader::new(file))?; - Ok(metiers) +fn synonymes<'de, D: Deserializer<'de>>(deserializer: D) -> Result, D::Error> { + #[derive(Deserialize)] + struct Synonymes { + #[serde(default)] + synonyme: Vec, + } + Ok(Synonymes::deserialize(deserializer)?.synonyme) +} + +fn paragraph_list<'de, D: Deserializer<'de>>(deserializer: D) -> Result, D::Error> { + let data = String::deserialize(deserializer)?; + let mut reader = quick_xml::Reader::from_str(&data); + reader.trim_text(true); + let mut lines = Vec::new(); + loop { + match reader.read_event() { + Ok(Event::Eof) => break, + Ok(Event::Text(text)) => { + lines.push(text.unescape().map_err(D::Error::custom)?.into_owned()) + } + Ok(_) => {} + Err(err) => return Err(D::Error::custom(err)), + } + } + Ok(lines) +} + +fn minimal_level<'de, D: Deserializer<'de>>(deserializer: D) -> Result { + #[derive(Deserialize)] + struct NiveauAccesMin { + #[serde(default)] + libelle: String, + } + Ok(NiveauAccesMin::deserialize(deserializer)?.libelle) +} + +fn formations<'de, D: Deserializer<'de>>(deserializer: D) -> Result, D::Error> { + #[derive(Deserialize)] + struct Formations { + #[serde(default)] + formation_min_requise: Vec, + } + #[derive(Deserialize)] + struct Formation { + id: String, + } + Ok(Formations::deserialize(deserializer)? + .formation_min_requise + .into_iter() + .map(|f| f.id) + .collect()) +} + +pub async fn download_metiers() -> anyhow::Result> { + let mut archive = + download_zip("https://api.opendata.onisep.fr/downloads/5fe0808a2da6f/5fe0808a2da6f.zip") + .await?; + + #[derive(Deserialize)] + struct Metiers { + metier: Vec, + } + + let file = archive.by_index(0)?; + let metiers: Metiers = quick_xml::de::from_reader(BufReader::new(file))?; + Ok(metiers + .metier + .into_iter() + .map(|m| (m.id.clone(), m)) + .collect()) +} + +#[derive(Default, PartialEq, Debug, Deserialize)] +pub struct Formation { + #[serde(rename = "identifiant")] + id: String, + + libelle_complet: String, + + #[serde(default, deserialize_with = "paragraph_list")] + descriptif_format_court: Vec, + + #[serde(default, deserialize_with = "paragraph_list")] + descriptif_acces: Vec, +} + +pub async fn download_formations() -> anyhow::Result> { + let mut archive = + download_zip("https://api.opendata.onisep.fr/downloads/5fe07a9ecc960/5fe07a9ecc960.zip") + .await?; + + #[derive(Deserialize)] + struct Formations { + formation: Vec, + } + + let file = archive.by_index(0)?; + let formations: Formations = quick_xml::de::from_reader(BufReader::new(file))?; + Ok(formations + .formation + .into_iter() + .map(|f| (f.id.clone(), f)) + .collect()) } diff --git a/backend/src/main.rs b/backend/src/main.rs index 7a19c5d..333bd40 100644 --- a/backend/src/main.rs +++ b/backend/src/main.rs @@ -1,8 +1,13 @@ use anyhow::Ok; +use backend::Metier; #[tokio::main] async fn main() -> anyhow::Result<()> { - let metiers = backend::download_metier().await?; + let metiers = backend::download_metiers().await?; println!("{:#?}", metiers); + + let formations = backend::download_formations().await?; + println!("{:#?}", formations); + Ok(()) }