content and better dates

harrybrwn · Jan 12, 2023 · 167aec4 · 167aec4
1 parent 4301d34
commit 167aec4
Show file tree

Hide file tree

Showing 7 changed files with 149 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@
 dist/
 *.tar.gz
 *.zip
+src/modified.json
 
 # dependencies
 node_modules/

diff --git a/content/.obsidian/app.json b/content/.obsidian/app.json
@@ -4,5 +4,7 @@
   "userIgnoreFilters": null,
   "showUnsupportedFiles": false,
   "spellcheck": true,
-  "showLineNumber": true
+  "showLineNumber": true,
+  "alwaysUpdateLinks": true,
+  "trashOption": "local"
 }
diff --git a/content/GPG.md b/content/GPG.md
@@ -0,0 +1,46 @@
+---
+tags:
+- security
+- opsec
+---
+
+# The Basics
+
+List all your keys.
+
+```sh
+gpg -K
+gpg -K --keyid-format=long # Long version shows the key id
+```
+
+Create a new key.
+
+```sh
+gpg --full-generate-key
+```
+
+# Isolation
+
+Gpg has a global configuration directory where all the keys go by default which can 
+make it hard to manage subkeys.
+
+```sh
+mkdir /tmp/gpg
+sudo mount -t ramfs -o size=2M ramfs /tmp/gpg
+sudo chown "$USER:$USER" /tmp/gpg
+gpg --homedir /tmp/gpg --import /path/to/other/keys
+gpg --homedir /tmp/gpg --list-secret-keys
+```
+
+It is best practice to keep a root signing key in an air-gaped environment and to use it 
+to sign additional encryption keys called subkeys.
+
+# References
+
+- [Creating the Perfect GPG KeyPair][1]
+- [Subkeys - Debian][2]
+- [Creating newer ECC keys for GnuPG][3]
+
+[1]: https://alexcabal.com/creating-the-perfect-gpg-keypair "Creating the Perfect GPG KeyPair"
+[2]: https://wiki.debian.org/Subkeys "Subkeys - Debian"
+[3]: https://www.gniibe.org/memo/software/gpg/keygen-25519.html "Creating newer ECC keys for GnuPG"
diff --git a/content/Information Retrieval.md b/content/Information Retrieval.md
@@ -0,0 +1,43 @@
+---
+tags:
+- computer-science
+---
+
+## The Inverted Index
+
+See this [natural language processing playlist](https://www.youtube.com/playlist?list=PLLssT5z_DsK8HbD2sPcUIDfQ7zmBarMYv).
+
+- [TF*IDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf)
+
+## Web Crawling
+
+[[Web Crawler|Web crawlers]] are just programs that download webpages, collect all the
+links, and visit all those links to repeat the process. This understates the potential
+complexity of the process quite a bit. There are many other additions and features to be
+added to a web crawler.
+
+To figure out what features a [[Web Crawler|web crawler]] must implement you need to
+produce a  list of characteristics for the system. This can be difficult because many
+characteristics interfere with one another leading to trade-offs.
+
+### Features and Characteristics
+
+- **Scalability**: the web is absolutely enormous and indexing any sizable portion
+	requires a scalable system. Having a [[Web Crawler#Distributed Web Crawlers|distributed web crawler]]
+-  is a good strategy for scalability.
+- **Niceness**: It is considered rude for a crawler to send very frequent requests to a
+	given server, some might even block the crawler's IP if it is running too fast
+- **Freshness**: The web is always changing so keeping the index up to date is ideal
+- **Robustness**: The web can have _spider traps_ either put there maliciously to stop
+	web crawlers or created on accident. A crawler should be resilient when finding these traps.
+- **Extensibility**: The web is always changing and a web crawler should be able to adapt easily [1].
+
+## Related
+
+- [[Search Engine]]
+
+## References
+
+- [Stanford textbook on Information Retrieval][1]
+
+[1]: https://nlp.stanford.edu/IR-book/ "Stanford Information Retrieval Book"
diff --git a/content/Open Source Closed Contribution.md b/content/Open Source Closed Contribution.md
@@ -0,0 +1,16 @@
+[[Open Source|Open source]] maintenance can be difficult. A [common half-joke](https://xkcd.com/2347/) in the tech industry is that everyone
+eventually relies a little open source project that has one single
+person who has been working without pay since 1990 to maintain their small hobby
+project. This dynamic between people who use software and people who build it
+poses many issues for the software industry but one of them is maintainer
+burnout, where the maintainer either cannot or will not continue to work on
+their project. This is much more common than you might imagine. Even slightly
+popular open source projects will see a huge number of people who have not
+integrated into the projects by reading contributor documentation try to open
+issues and pull requests demanding that their issue be fixed. This causes some
+projects to be abandoned by their owners and never touched again.
+
+A method of mitigating this that has been gaining popularity recently is
+[Open source but closed contribution](https://changelog.com/news/open-source-but-closed-to-contributions-jGyl):
+Open source project that you can use freely and fork but the maintainer will not
+do the difficult part; reviewing pull requests, getting feature requests, and answering questions.
diff --git a/src/layouts/BlogPost.astro b/src/layouts/BlogPost.astro
@@ -1,6 +1,7 @@
 ---
 import Nav from "~/components/Nav.astro";
 import Base from "~/layouts/Base.astro";
+import { humanDate } from "~/lib/dates";
 import "~/styles/code.scss";
 
 export interface Props {
@@ -12,8 +13,9 @@ export interface Props {
   };
 }
 const {
-  content: { title, description, tags },
+  content: { title, description, tags, pubDate },
 } = Astro.props;
+const date = new Date(pubDate);
 ---
 
 <Base title={title} description={description}>
@@ -36,6 +38,7 @@ const {
     <hr />
     <footer>
       {tags && tags.length > 0 && <pre>tags: [{tags?.join(" ")}]</pre>}
+      published: {humanDate(date)}
     </footer>
   </main>
 </Base>

diff --git a/src/lib/blog.ts b/src/lib/blog.ts
@@ -2,6 +2,9 @@ import path from "path";
 import fs from "fs";
 import type { MarkdownInstance } from "astro";
 import GithubSlugger, { slug as toSlugBase } from "github-slugger";
+// import modified from "~/modified.json";
+import { spawnSync } from "child_process";
+import walk from "walkdir";
 
 export interface Frontmatter extends Record<string, any> {
   title: string;
@@ -47,6 +50,30 @@ export const slug = (
   }
 };
 
+type Dates = Record<string, Date[]>;
+
+const gatherModDates = (dir: string) => {
+  let files: Dates = {};
+  walk.sync(dir, function (filename: string, stat: fs.Stats) {
+    if (stat.isDirectory()) return;
+    if (path.extname(filename) !== ".md") return;
+    const name = path.relative(process.cwd(), filename);
+    const cmd = spawnSync("git", [
+      "--no-pager",
+      "log",
+      "--pretty=format:%cd",
+      name,
+    ]);
+    files[name] = cmd.stdout
+      .toString()
+      .split("\n")
+      .map((s) => (s.length > 0 ? new Date(s) : new Date()));
+  });
+  return files;
+};
+
+const modified = gatherModDates("./content");
+
 const validate = (frontmatter: Frontmatter) => {
   if (!frontmatter.title || frontmatter.title.length === 0) {
     throw new Error("markdown post must have a title in the frontmatter");
@@ -88,8 +115,15 @@ const preparePosts = (
       if (p.frontmatter.pubDate) {
         p.frontmatter.pubDate = new Date(p.frontmatter.pubDate).toISOString();
       } else {
-        let stat = fs.statSync(p.file);
-        p.frontmatter.pubDate = stat.mtime.toISOString();
+        let name = path.relative(process.cwd(), p.file);
+        if (name in modified) {
+          const dates = modified[name];
+          const d = dates[dates.length - 1];
+          p.frontmatter.pubDate = d.toISOString();
+        } else {
+          let stat = fs.statSync(p.file);
+          p.frontmatter.pubDate = stat.mtime.toISOString();
+        }
       }
       return p;
     })