feat: add sitemap index with static/users/projects/repos sub-sitemaps
- Main sitemap index at /sitemap.xml referencing 4 sub-sitemaps
- /sidemap/static: fixed routes (homepage, auth, marketing pages)
- /sidemap/users: public user profiles sorted alphabetically
- /sidemap/projects: public projects sorted alphabetically
- /sidemap/repos: public repos sorted alphabetically
- Redis cache with 8h TTL (no refresh on access), key: sidemap:{type}
- robots.txt Sitemap URL uses main_domain() with https:// forced
- All sitemap loc entries use https:// base URL
This commit is contained in:
parent
a8494cc032
commit
d593354ba9
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -647,6 +647,7 @@ dependencies = [
|
||||
"mime_guess2",
|
||||
"models",
|
||||
"queue",
|
||||
"redis",
|
||||
"room",
|
||||
"rust_decimal",
|
||||
"sea-orm",
|
||||
|
||||
@ -13,7 +13,7 @@ use observability::{
|
||||
use sea_orm::ConnectionTrait;
|
||||
use service::AppService;
|
||||
use session::config::{PersistentSession, SessionLifecycle, TtlExtensionPolicy};
|
||||
use api::robots;
|
||||
use api::{robots, sidemap};
|
||||
use session::storage::RedisClusterSessionStore;
|
||||
use session::SessionMiddleware;
|
||||
use std::task::{Context, Poll};
|
||||
@ -220,6 +220,15 @@ async fn main() -> anyhow::Result<()> {
|
||||
.app_data(http_snapshot_data.clone())
|
||||
.app_data(prometheus_handle_data.clone())
|
||||
.route("/robots.txt", web::get().to(robots::robots))
|
||||
.route("/sitemap.xml", web::get().to(sidemap::sitemap))
|
||||
.service(
|
||||
web::scope("/sidemap")
|
||||
.route("", web::get().to(sidemap::sitemap))
|
||||
.route("/static", web::get().to(sidemap::sitemap_static))
|
||||
.route("/users", web::get().to(sidemap::sitemap_users))
|
||||
.route("/projects", web::get().to(sidemap::sitemap_projects))
|
||||
.route("/repos", web::get().to(sidemap::sitemap_repos)),
|
||||
)
|
||||
.route("/health", web::get().to(health_check))
|
||||
.route("/metrics", web::get().to(prometheus_handler))
|
||||
.configure(api::route::init_routes)
|
||||
|
||||
@ -49,5 +49,6 @@ mime_guess2 = { workspace = true, features = ["phf-map"] }
|
||||
sea-orm = "2.0.0-rc.37"
|
||||
rust_decimal = "1.40.0"
|
||||
actix-multipart = { workspace = true, features = ["tempfile"] }
|
||||
redis = { workspace = true }
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
@ -1,11 +1,22 @@
|
||||
use actix_web::HttpResponse;
|
||||
use actix_web::{web, HttpResponse};
|
||||
use service::AppService;
|
||||
|
||||
/// Serves robots.txt, blocking all sensitive paths from crawlers.
|
||||
pub async fn robots() -> HttpResponse {
|
||||
HttpResponse::Ok()
|
||||
.content_type("text/plain; charset=utf-8")
|
||||
.body(
|
||||
r#"User-agent: *
|
||||
pub async fn robots(service: web::Data<AppService>) -> HttpResponse {
|
||||
let raw = service
|
||||
.config
|
||||
.main_domain()
|
||||
.unwrap_or_else(|_| "https://gitdata.ai".to_string());
|
||||
let sitemap_base = if raw.starts_with("https://") {
|
||||
raw.trim_end_matches('/').to_string()
|
||||
} else if raw.starts_with("http://") {
|
||||
raw.replacen("http://", "https://", 1)
|
||||
} else {
|
||||
format!("https://{raw}")
|
||||
};
|
||||
|
||||
let body = format!(
|
||||
r#"User-agent: *
|
||||
Disallow: /api/
|
||||
Disallow: /health
|
||||
Disallow: /metrics
|
||||
@ -15,6 +26,12 @@ Disallow: /blob/
|
||||
Disallow: /media/
|
||||
Disallow: /static/
|
||||
Disallow: /assets/
|
||||
|
||||
Sitemap: {sitemap_base}/sitemap.xml
|
||||
"#,
|
||||
)
|
||||
);
|
||||
|
||||
HttpResponse::Ok()
|
||||
.content_type("text/plain; charset=utf-8")
|
||||
.body(body)
|
||||
}
|
||||
|
||||
@ -0,0 +1,328 @@
|
||||
use actix_web::{web, HttpResponse};
|
||||
use db::cache::AppCache;
|
||||
use models::projects::project::{Column as PCol, Entity as PEntity};
|
||||
use models::repos::repo::{Column as RCol, Entity as REntity};
|
||||
use models::users::user::{Column as UCol, Entity as UEntity};
|
||||
use sea_orm::*;
|
||||
use service::AppService;
|
||||
|
||||
const CACHE_KEY_PREFIX: &str = "sidemap";
|
||||
const CACHE_TTL_SECS: u64 = 8 * 3600; // 8 hours, no refresh
|
||||
|
||||
/// Returns the base URL, forcing https:// prefix for public sitemap crawlers.
|
||||
fn public_base(config: &config::AppConfig) -> String {
|
||||
let fallback = "https://gitdata.ai".to_string();
|
||||
let base = match config.main_domain() {
|
||||
Ok(b) => b.trim_end_matches('/').to_string(),
|
||||
Err(_) => fallback,
|
||||
};
|
||||
if base.starts_with("https://") {
|
||||
base
|
||||
} else if base.starts_with("http://") {
|
||||
base.replacen("http://", "https://", 1)
|
||||
} else {
|
||||
format!("https://{base}")
|
||||
}
|
||||
}
|
||||
|
||||
// ── Handlers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Main sitemap index referencing all sub-sitemaps.
|
||||
pub async fn sitemap(service: web::Data<AppService>) -> HttpResponse {
|
||||
let base = public_base(&service.config);
|
||||
|
||||
let xml = format!(
|
||||
r#"<?xml version="1.0" encoding="UTF-8"?>
|
||||
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<sitemap>
|
||||
<loc>{base}/sidemap/static</loc>
|
||||
</sitemap>
|
||||
<sitemap>
|
||||
<loc>{base}/sidemap/users</loc>
|
||||
</sitemap>
|
||||
<sitemap>
|
||||
<loc>{base}/sidemap/projects</loc>
|
||||
</sitemap>
|
||||
<sitemap>
|
||||
<loc>{base}/sidemap/repos</loc>
|
||||
</sitemap>
|
||||
</sitemapindex>"#
|
||||
);
|
||||
|
||||
HttpResponse::Ok()
|
||||
.content_type("application/xml; charset=utf-8")
|
||||
.body(xml)
|
||||
}
|
||||
|
||||
/// Static routes (no DB, no cache).
|
||||
pub async fn sitemap_static(service: web::Data<AppService>) -> HttpResponse {
|
||||
let base = public_base(&service.config);
|
||||
|
||||
HttpResponse::Ok()
|
||||
.content_type("application/xml; charset=utf-8")
|
||||
.body(build_static_xml(&base))
|
||||
}
|
||||
|
||||
/// User profiles sitemap.
|
||||
pub async fn sitemap_users(service: web::Data<AppService>) -> HttpResponse {
|
||||
let base = public_base(&service.config);
|
||||
|
||||
let xml = cached_or_build(&service.cache, "users", || async {
|
||||
let db = service.db.reader();
|
||||
let users: Vec<(String, String)> = UEntity::find()
|
||||
.filter(UCol::Username.ne(""))
|
||||
.order_by_asc(UCol::Username)
|
||||
.all(db)
|
||||
.await
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|u| (u.username, u.updated_at.to_rfc3339()))
|
||||
.collect();
|
||||
Ok(build_users_xml(&base, &users))
|
||||
})
|
||||
.await
|
||||
.unwrap_or_else(|_| build_users_xml(&base, &[]));
|
||||
|
||||
HttpResponse::Ok()
|
||||
.content_type("application/xml; charset=utf-8")
|
||||
.body(xml)
|
||||
}
|
||||
|
||||
/// Public projects sitemap.
|
||||
pub async fn sitemap_projects(service: web::Data<AppService>) -> HttpResponse {
|
||||
let base = public_base(&service.config);
|
||||
|
||||
let xml = cached_or_build(&service.cache, "projects", || async {
|
||||
let db = service.db.reader();
|
||||
let projects: Vec<(String, String, String)> = PEntity::find()
|
||||
.filter(PCol::IsPublic.eq(true))
|
||||
.order_by_asc(PCol::Name)
|
||||
.all(db)
|
||||
.await
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|p| (p.name, p.id.to_string(), p.updated_at.to_rfc3339()))
|
||||
.collect();
|
||||
Ok(build_projects_xml(&base, &projects))
|
||||
})
|
||||
.await
|
||||
.unwrap_or_else(|_| build_projects_xml(&base, &[]));
|
||||
|
||||
HttpResponse::Ok()
|
||||
.content_type("application/xml; charset=utf-8")
|
||||
.body(xml)
|
||||
}
|
||||
|
||||
/// Public repos sitemap.
|
||||
pub async fn sitemap_repos(service: web::Data<AppService>) -> HttpResponse {
|
||||
let base = public_base(&service.config);
|
||||
|
||||
let xml = cached_or_build(&service.cache, "repos", || async {
|
||||
let db = service.db.reader();
|
||||
|
||||
let project_map: std::collections::HashMap<String, String> = PEntity::find()
|
||||
.filter(PCol::IsPublic.eq(true))
|
||||
.all(db)
|
||||
.await
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|p| (p.id.to_string(), p.name))
|
||||
.collect();
|
||||
|
||||
let repos: Vec<(String, String)> = REntity::find()
|
||||
.filter(RCol::IsPrivate.eq(false))
|
||||
.order_by_asc(RCol::RepoName)
|
||||
.all(db)
|
||||
.await
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.filter_map(|r| {
|
||||
let ns = project_map.get(&r.project.to_string())?;
|
||||
Some((format!("{ns}/{}", r.repo_name), r.updated_at.to_rfc3339()))
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(build_repos_xml(&base, &repos))
|
||||
})
|
||||
.await
|
||||
.unwrap_or_else(|_| build_repos_xml(&base, &[]));
|
||||
|
||||
HttpResponse::Ok()
|
||||
.content_type("application/xml; charset=utf-8")
|
||||
.body(xml)
|
||||
}
|
||||
|
||||
// ── Cache helpers ────────────────────────────────────────────────────────────────
|
||||
|
||||
async fn cached_or_build<F, Fut>(cache: &AppCache, key: &str, build: F) -> Result<String, ()>
|
||||
where
|
||||
F: FnOnce() -> Fut,
|
||||
Fut: std::future::Future<Output = Result<String, ()>>,
|
||||
{
|
||||
let cache_key = format!("{CACHE_KEY_PREFIX}:{key}");
|
||||
|
||||
if let Ok(xml) = get_cached(cache, &cache_key).await {
|
||||
return Ok(xml);
|
||||
}
|
||||
|
||||
let xml = build().await?;
|
||||
|
||||
let _ = set_cached(cache, &cache_key, &xml).await;
|
||||
|
||||
Ok(xml)
|
||||
}
|
||||
|
||||
async fn get_cached(cache: &AppCache, key: &str) -> Result<String, ()> {
|
||||
let mut conn = cache.redis_pool().get().await.map_err(|e| {
|
||||
tracing::debug!("sidemap redis get pool error: {}", e);
|
||||
})?;
|
||||
redis::cmd("GET")
|
||||
.arg(key)
|
||||
.query_async::<String>(&mut conn)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::debug!("sidemap redis get error: {}", e);
|
||||
})
|
||||
}
|
||||
|
||||
async fn set_cached(cache: &AppCache, key: &str, value: &str) -> Result<(), ()> {
|
||||
let mut conn = cache.redis_pool().get().await.map_err(|e| {
|
||||
tracing::debug!("sidemap redis set pool error: {}", e);
|
||||
})?;
|
||||
redis::cmd("SETEX")
|
||||
.arg(key)
|
||||
.arg(CACHE_TTL_SECS)
|
||||
.arg(value)
|
||||
.query_async::<()>(&mut conn)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::debug!("sidemap redis set error: {}", e);
|
||||
})
|
||||
}
|
||||
|
||||
// ── XML builders ────────────────────────────────────────────────────────────────
|
||||
|
||||
fn build_static_xml(base: &str) -> String {
|
||||
let mut xml = xml_header();
|
||||
for loc in [
|
||||
"/",
|
||||
"/auth/login",
|
||||
"/auth/register",
|
||||
"/auth/password/reset",
|
||||
"/auth/reset-password",
|
||||
"/auth/verify-email",
|
||||
"/about",
|
||||
"/pricing",
|
||||
"/pricing/enterprise",
|
||||
"/pricing/faq",
|
||||
"/skills",
|
||||
"/skills/publish",
|
||||
"/skills/docs",
|
||||
"/solutions",
|
||||
"/solutions/rooms",
|
||||
"/solutions/memory",
|
||||
"/solutions/governance",
|
||||
"/network",
|
||||
"/network/rooms",
|
||||
"/network/api",
|
||||
"/docs",
|
||||
] {
|
||||
xml.push_str(&url_entry(&format!("{base}{loc}"), 0.9, "daily", None));
|
||||
}
|
||||
xml.push_str("</urlset>");
|
||||
xml
|
||||
}
|
||||
|
||||
fn build_users_xml(base: &str, users: &[(String, String)]) -> String {
|
||||
let mut xml = xml_header();
|
||||
for (username, updated) in users {
|
||||
xml.push_str(&url_entry(
|
||||
&format!("{base}/user/{username}"),
|
||||
0.6,
|
||||
"weekly",
|
||||
Some(updated),
|
||||
));
|
||||
}
|
||||
xml.push_str("</urlset>");
|
||||
xml
|
||||
}
|
||||
|
||||
fn build_projects_xml(base: &str, projects: &[(String, String, String)]) -> String {
|
||||
let mut xml = xml_header();
|
||||
for (name, _, updated) in projects {
|
||||
xml.push_str(&url_entry(
|
||||
&format!("{base}/project/{name}"),
|
||||
0.7,
|
||||
"weekly",
|
||||
Some(updated),
|
||||
));
|
||||
for sub in [
|
||||
"/activity",
|
||||
"/repositories",
|
||||
"/issues",
|
||||
"/members",
|
||||
"/articles",
|
||||
"/resources",
|
||||
] {
|
||||
xml.push_str(&url_entry(
|
||||
&format!("{base}/project/{name}{sub}"),
|
||||
0.6,
|
||||
"weekly",
|
||||
Some(updated),
|
||||
));
|
||||
}
|
||||
}
|
||||
xml.push_str("</urlset>");
|
||||
xml
|
||||
}
|
||||
|
||||
fn build_repos_xml(base: &str, repos: &[(String, String)]) -> String {
|
||||
let mut xml = xml_header();
|
||||
for (path, updated) in repos {
|
||||
xml.push_str(&url_entry(
|
||||
&format!("{base}/repository/{path}"),
|
||||
0.7,
|
||||
"daily",
|
||||
Some(updated),
|
||||
));
|
||||
for sub in [
|
||||
"/files",
|
||||
"/commits",
|
||||
"/branches",
|
||||
"/tags",
|
||||
"/contributors",
|
||||
"/pull-requests",
|
||||
] {
|
||||
xml.push_str(&url_entry(
|
||||
&format!("{base}/repository/{path}{sub}"),
|
||||
0.6,
|
||||
"daily",
|
||||
Some(updated),
|
||||
));
|
||||
}
|
||||
}
|
||||
xml.push_str("</urlset>");
|
||||
xml
|
||||
}
|
||||
|
||||
fn xml_header() -> String {
|
||||
String::from(
|
||||
r#"<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
"#,
|
||||
)
|
||||
}
|
||||
|
||||
fn url_entry(loc: &str, priority: f32, changefreq: &str, updated: Option<&str>) -> String {
|
||||
let updated_xml = updated
|
||||
.map(|d| format!("\n <lastmod>{d}</lastmod>"))
|
||||
.unwrap_or_default();
|
||||
format!(
|
||||
r#" <url>
|
||||
<loc>{loc}</loc>{updated_xml}
|
||||
<changefreq>{changefreq}</changefreq>
|
||||
<priority>{priority}</priority>
|
||||
</url>
|
||||
"#,
|
||||
)
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user