Récupération des données des metiers et des formations (sans documentation)

This commit is contained in:
Tipragot 2023-12-24 18:17:12 +01:00
parent 96d645ca75
commit 9354b51f24
3 changed files with 138 additions and 43 deletions

View file

@ -5,11 +5,8 @@ edition = "2021"
[dependencies] [dependencies]
anyhow = "1.0.76" anyhow = "1.0.76"
bytes = "1.5.0"
paste = "1.0.14"
quick-xml = { version = "0.31.0", features = ["serde", "serialize"] } quick-xml = { version = "0.31.0", features = ["serde", "serialize"] }
reqwest = "0.11.23" reqwest = "0.11.23"
serde = { version = "1.0.193", features = ["derive"] } serde = { version = "1.0.193", features = ["derive"] }
serde-xml-rs = "0.6.0"
tokio = { version = "1.35.1", features = ["full"] } tokio = { version = "1.35.1", features = ["full"] }
zip = "0.6.6" zip = "0.6.6"

View file

@ -1,51 +1,144 @@
// https://api.opendata.onisep.fr/downloads/5fe0808a2da6f/5fe0808a2da6f.zip // https://api.opendata.onisep.fr/downloads/5fe0808a2da6f/5fe0808a2da6f.zip
use paste::paste; use quick_xml::events::Event;
use serde::de::Error;
use serde::{Deserialize, Deserializer}; use serde::{Deserialize, Deserializer};
use std::{ use std::collections::HashMap;
io::{BufReader, Cursor}, use std::io::{BufReader, Cursor};
os::windows::fs::FileExt, use zip::ZipArchive;
};
macro_rules! unwrap_vec { async fn download_zip(url: &str) -> anyhow::Result<ZipArchive<Cursor<Vec<u8>>>> {
($name:ident, $t:ty) => { let response = reqwest::get(url).await?;
paste! { let response = Cursor::new(response.bytes().await?.to_vec());
fn [<unwrap_ $name s>]<'de, D: Deserializer<'de>>(deserializer: D) -> Result<Vec<$t>, D::Error> { Ok(zip::ZipArchive::new(response)?)
#[derive(Deserialize)]
struct List {
#[serde(default)]
$name: Vec<$t>,
}
Ok(List::deserialize(deserializer)?.$name)
}
}
};
} }
#[derive(Deserialize, Debug)] #[derive(Default, PartialEq, Debug, Deserialize)]
pub struct Metiers {
#[serde(rename = "$value")]
metiers: Vec<Metier>,
}
unwrap_vec!(synonyme, String);
#[derive(Deserialize, Debug)]
pub struct Metier { pub struct Metier {
#[serde(rename = "identifiant")]
id: String,
nom_metier: String,
libelle_feminin: String, libelle_feminin: String,
libelle_masculin: String, libelle_masculin: String,
#[serde(deserialize_with = "unwrap_synonymes")]
synonymes: Vec<String>, #[serde(deserialize_with = "synonymes")]
acces_metier: String, pub synonymes: Vec<String>,
#[serde(deserialize_with = "paragraph_list")]
competences: Vec<String>,
#[serde(rename = "acces_metier", deserialize_with = "paragraph_list")]
conditions: Vec<String>,
#[serde(rename = "niveau_acces_min", deserialize_with = "minimal_level")]
minimal_level: String,
#[serde(rename = "formations_min_requise", deserialize_with = "formations")]
formations: Vec<String>,
} }
pub async fn download_metier() -> anyhow::Result<Metiers> { fn synonymes<'de, D: Deserializer<'de>>(deserializer: D) -> Result<Vec<String>, D::Error> {
let response = #[derive(Deserialize)]
reqwest::get("https://api.opendata.onisep.fr/downloads/5fe0808a2da6f/5fe0808a2da6f.zip") struct Synonymes {
.await?; #[serde(default)]
let response = Cursor::new(response.bytes().await?.to_vec()); synonyme: Vec<String>,
let mut archive = zip::ZipArchive::new(response)?; }
let file = archive.by_index(0)?; Ok(Synonymes::deserialize(deserializer)?.synonyme)
let metiers = quick_xml::de::from_reader(BufReader::new(file))?; }
Ok(metiers)
fn paragraph_list<'de, D: Deserializer<'de>>(deserializer: D) -> Result<Vec<String>, D::Error> {
let data = String::deserialize(deserializer)?;
let mut reader = quick_xml::Reader::from_str(&data);
reader.trim_text(true);
let mut lines = Vec::new();
loop {
match reader.read_event() {
Ok(Event::Eof) => break,
Ok(Event::Text(text)) => {
lines.push(text.unescape().map_err(D::Error::custom)?.into_owned())
}
Ok(_) => {}
Err(err) => return Err(D::Error::custom(err)),
}
}
Ok(lines)
}
fn minimal_level<'de, D: Deserializer<'de>>(deserializer: D) -> Result<String, D::Error> {
#[derive(Deserialize)]
struct NiveauAccesMin {
#[serde(default)]
libelle: String,
}
Ok(NiveauAccesMin::deserialize(deserializer)?.libelle)
}
fn formations<'de, D: Deserializer<'de>>(deserializer: D) -> Result<Vec<String>, D::Error> {
#[derive(Deserialize)]
struct Formations {
#[serde(default)]
formation_min_requise: Vec<Formation>,
}
#[derive(Deserialize)]
struct Formation {
id: String,
}
Ok(Formations::deserialize(deserializer)?
.formation_min_requise
.into_iter()
.map(|f| f.id)
.collect())
}
pub async fn download_metiers() -> anyhow::Result<HashMap<String, Metier>> {
let mut archive =
download_zip("https://api.opendata.onisep.fr/downloads/5fe0808a2da6f/5fe0808a2da6f.zip")
.await?;
#[derive(Deserialize)]
struct Metiers {
metier: Vec<Metier>,
}
let file = archive.by_index(0)?;
let metiers: Metiers = quick_xml::de::from_reader(BufReader::new(file))?;
Ok(metiers
.metier
.into_iter()
.map(|m| (m.id.clone(), m))
.collect())
}
#[derive(Default, PartialEq, Debug, Deserialize)]
pub struct Formation {
#[serde(rename = "identifiant")]
id: String,
libelle_complet: String,
#[serde(default, deserialize_with = "paragraph_list")]
descriptif_format_court: Vec<String>,
#[serde(default, deserialize_with = "paragraph_list")]
descriptif_acces: Vec<String>,
}
pub async fn download_formations() -> anyhow::Result<HashMap<String, Formation>> {
let mut archive =
download_zip("https://api.opendata.onisep.fr/downloads/5fe07a9ecc960/5fe07a9ecc960.zip")
.await?;
#[derive(Deserialize)]
struct Formations {
formation: Vec<Formation>,
}
let file = archive.by_index(0)?;
let formations: Formations = quick_xml::de::from_reader(BufReader::new(file))?;
Ok(formations
.formation
.into_iter()
.map(|f| (f.id.clone(), f))
.collect())
} }

View file

@ -1,8 +1,13 @@
use anyhow::Ok; use anyhow::Ok;
use backend::Metier;
#[tokio::main] #[tokio::main]
async fn main() -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> {
let metiers = backend::download_metier().await?; let metiers = backend::download_metiers().await?;
println!("{:#?}", metiers); println!("{:#?}", metiers);
let formations = backend::download_formations().await?;
println!("{:#?}", formations);
Ok(()) Ok(())
} }