multilanguage support

1 year ago · b18274dd3e
parent e794145b2e
commit b18274dd3e
52 changed files with 127 additions and 16 deletions
--- a/img/llama-1.png
+++ b/img/llama-1.png
--- a/middleware.js
+++ b/middleware.js
@ -0,0 +1 @@
+export { locales as middleware } from 'nextra/locales'
--- a/next.config.js
+++ b/next.config.js
@ -3,4 +3,9 @@ const withNextra = require('nextra')({
  themeConfig: './theme.config.tsx',
 })

-module.exports = withNextra()
+module.exports = withNextra({
+  i18n: {
+    locales: ['en', 'zh'],
+    defaultLocale: 'en'
+  }
+})
--- a/pages/_meta.en.json
+++ b/pages/_meta.en.json
--- a/pages/_meta.zh.json
+++ b/pages/_meta.zh.json
@ -0,0 +1,24 @@
+{
+    "index": "Prompt Engineering (ZH)",
+    "introduction": "Introduction",
+    "techniques": "Techniques",
+    "applications": "Applications",
+    "models": "Models",
+    "risks": "Risks & Misuses",
+    "papers": "Papers",
+    "tools": "Tools",
+    "notebooks": "Notebooks",
+    "datasets": "Datasets",
+    "readings": "Additional Readings",
+    "about": {
+      "title": "About",
+      "type": "page"
+    },
+    "contact": {
+      "title": "Contact ↗",
+      "type": "page",
+      "href": "https://twitter.com/dair_ai",
+      "newWindow": true
+    }
+  }
+  
--- a/pages/about.en.mdx
+++ b/pages/about.en.mdx
--- a/pages/applications.en.mdx
+++ b/pages/applications.en.mdx
@ -0,0 +1,9 @@
+# Prompting Applications
+
+import { Callout } from 'nextra-theme-docs'
+
+In this section, we will cover some advanced and interesting ways we can use prompt engineering to perform useful and more advanced tasks. 
+
+<Callout emoji="⚠️">
+  This section is under heavy development.
+</Callout>
--- a/pages/applications.mdx
+++ b/pages/applications.mdx
@ -1,9 +0,0 @@
-# Prompting Applications
-
-import { Callout } from 'nextra-theme-docs'
-
-In this guide we will cover some advanced and interesting ways we can use prompt engineering to perform useful and more advanced tasks. 
-
-<Callout emoji="⚠️">
-  This section is under heavy development.
-</Callout>
--- a/pages/applications/_meta.en.json
+++ b/pages/applications/_meta.en.json
--- a/pages/applications/generating.en.mdx
+++ b/pages/applications/generating.en.mdx
--- a/pages/applications/pal.en.mdx
+++ b/pages/applications/pal.en.mdx
--- a/pages/datasets.en.mdx
+++ b/pages/datasets.en.mdx
--- a/pages/index.en.mdx
+++ b/pages/index.en.mdx
--- a/pages/index.zh.mdx
+++ b/pages/index.zh.mdx
@ -0,0 +1,3 @@
+# Prompt Engineering (ZH)
+
+...
--- a/pages/introduction.en.mdx
+++ b/pages/introduction.en.mdx
--- a/pages/introduction/_meta.en.json
+++ b/pages/introduction/_meta.en.json
--- a/pages/introduction/basics.en.mdx
+++ b/pages/introduction/basics.en.mdx
--- a/pages/introduction/elements.en.mdx
+++ b/pages/introduction/elements.en.mdx
--- a/pages/introduction/examples.en.mdx
+++ b/pages/introduction/examples.en.mdx
--- a/pages/introduction/settings.en.mdx
+++ b/pages/introduction/settings.en.mdx
--- a/pages/introduction/tips.en.mdx
+++ b/pages/introduction/tips.en.mdx
--- a/pages/models.en.mdx
+++ b/pages/models.en.mdx
--- a/pages/models/_meta.en.json
+++ b/pages/models/_meta.en.json
@ -0,0 +1,8 @@
+{
+    "flan": "Flan",
+    "chatgpt": "ChatGPT",
+    "llama": "LLaMA",
+    "gpt-4": "GPT-4",
+    "collection": "Model Collection" 
+}
+  
--- a/pages/models/_meta.json
+++ b/pages/models/_meta.json
@ -1,6 +0,0 @@
-{
-    "flan": "Flan",
-    "chatgpt": "ChatGPT",
-    "gpt-4": "GPT-4"
-}
-  
--- a/pages/models/chatgpt.en.mdx
+++ b/pages/models/chatgpt.en.mdx
--- a/pages/models/collection.en.mdx
+++ b/pages/models/collection.en.mdx
@ -0,0 +1,27 @@
+# Model Collection
+
+import { Callout, FileTree } from 'nextra-theme-docs'
+
+<Callout emoji="⚠️">
+  This section is under heavy development.
+</Callout>
+
+This section consists of a collection and summary of notable and foundational LLMs.
+
+
+
+## Models
+
+| Model | Description | 
+| --- | --- | 
+| [BERT](https://arxiv.org/abs/1810.04805) | Bidirectional Encoder Representations from Transformers | 
+| [RoBERTa](https://arxiv.org/abs/1907.11692) | A Robustly Optimized BERT Pretraining Approach | 
+| [ALBERT](https://arxiv.org/abs/1909.11942) | A Lite BERT for Self-supervised Learning of Language Representations | 
+| [XLNet](https://arxiv.org/abs/1906.08237) | Generalized Autoregressive Pretraining for Language Understanding and Generation |
+| [GPT](https://s3-us-west-2.amazonaws.com/openai-assets/research-covers/language-unsupervised/language_understanding_paper.pdf) | Language Models are Unsupervised Multitask Learners | 
+| [GPT-2](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf) | Language Models are Unsupervised Multitask Learners | 
+| [GPT-3](https://arxiv.org/abs/2005.14165) | Language Models are Few-Shot Learners |
+| [T5](https://arxiv.org/abs/1910.10683) | Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer | 
+| [CTRL](https://arxiv.org/abs/1909.05858) | CTRL: A Conditional Transformer Language Model for Controllable Generation | 
+| [BART](https://arxiv.org/abs/1910.13461) | Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension |
+| [Chinchilla](https://arxiv.org/abs/2203.15556)(Hoffman et al. 2022) | Shows that for a compute budget, the best performances are not achieved by the largest models but by smaller models trained on more data. |
--- a/pages/models/flan.en.mdx
+++ b/pages/models/flan.en.mdx
--- a/pages/models/gpt-4.en.mdx
+++ b/pages/models/gpt-4.en.mdx
--- a/pages/models/llama.en.mdx
+++ b/pages/models/llama.en.mdx
@ -0,0 +1,39 @@
+## LLaMA: Open and Efficient Foundation Language Models
+
+<Callout emoji="⚠️">
+  This section is under heavy development.
+</Callout>
+
+
+import {Screenshot} from 'components/screenshot'
+import { Callout, FileTree } from 'nextra-theme-docs'
+import LLAMA1 from '../../img/llama-1.png'
+
+
+## What's new?
+
+This paper introduces a collection of foundation language models ranging from 7B to 65B parameters. 
+
+The models are trained on trillion of tokens with publicly available datasets.
+
+The work by [(Hoffman et al. 2022)](https://arxiv.org/abs/2203.15556) shows that given a compute budget smaller models trained on a lot more data can achieve better performance than the larger counterparts. This work recommends training 10B models on 200B tokens. However, the LLaMA paper finds that the performance of a 7B model continues to improve even after 1T tokens. 
+
+<Screenshot src={LLAMA1} alt="LLAMA1" />
+
+This work focuses on training models (LLaMA) that achieve the best possible performance at various inference budgets, by training on more tokens. 
+
+
+## Capabilities & Key Results
+
+Overall, LLaMA-13B outperform GPT-3(175B) on many benchmarks despite being 10x smaller and possible to run a single GPU. LLaMA 65B is competitive with models like Chinchilla-70B and PaLM-540B.
+
+
+*Paper:* [LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971)
+
+*Code:* https://github.com/facebookresearch/llama
+
+## References
+
+- [GPT4All](https://github.com/nomic-ai/gpt4all) (March 2023)
+- [ChatDoctor: A Medical Chat Model Fine-tuned on LLaMA Model using Medical Domain Knowledge](https://arxiv.org/abs/2303.14070) (March 2023)
+- [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) (March 2023)
--- a/pages/notebooks.en.mdx
+++ b/pages/notebooks.en.mdx
--- a/pages/papers.en.mdx
+++ b/pages/papers.en.mdx
@ -112,6 +112,7 @@ The following are the latest papers (sorted by release date) on prompt engineeri
  - [AutoPrompt: Eliciting Knowledge from Language Models with Automatically Generated Prompts](https://arxiv.org/abs/2010.15980) (Oct 2020)
  - [Language Models are Few-Shot Learners](https://arxiv.org/abs/2005.14165) (May 2020)
  - [How Can We Know What Language Models Know?](https://direct.mit.edu/tacl/article/doi/10.1162/tacl_a_00324/96460/How-Can-We-Know-What-Language-Models-Know) (July 2020)
+  - [Scaling Laws for Neural Language Models](https://arxiv.org/abs/2001.08361) (Jan 2020)

 ## Applications
  
--- a/pages/readings.en.mdx
+++ b/pages/readings.en.mdx
@ -6,6 +6,7 @@
 - [A beginner-friendly guide to generative language models - LaMBDA guide](https://aitestkitchen.withgoogle.com/how-lamda-works)
 - [A Complete Introduction to Prompt Engineering for Large Language Models](https://www.mihaileric.com/posts/a-complete-introduction-to-prompt-engineering)
 - [A Generic Framework for ChatGPT Prompt Engineering](https://medium.com/@thorbjoern.heise/a-generic-framework-for-chatgpt-prompt-engineering-7097f6513a0b)
+- [CS 324 - Advances in Foundation Models](https://stanford-cs324.github.io/winter2023/)
 - [An SEO’s guide to ChatGPT prompts](https://searchengineland.com/chatgpt-prompts-seo-393523)
 - [AI Content Generation](https://www.jonstokes.com/p/ai-content-generation-part-1-machine)
 - [AI's rise generates new job title: Prompt engineer](https://www.axios.com/2023/02/22/chatgpt-prompt-engineers-ai-job)
@ -22,6 +23,8 @@
 - [Common Sense as Dark Matter - Yejin Choi | Stanford MLSys #78](https://youtube.com/live/n4HakBqoCVg?feature=shares)
 - [Create images with your words – Bing Image Creator comes to the new Bing](https://blogs.microsoft.com/blog/2023/03/21/create-images-with-your-words-bing-image-creator-comes-to-the-new-bing/)
 - [Curtis64's set of prompt gists](https://gist.github.com/Curtis-64)
+- [CS324 - Large Language Models](https://stanford-cs324.github.io/winter2022/)
+- [CS224N: Natural Language Processing with Deep Learning](https://web.stanford.edu/class/cs224n/)
 - [DALL·E 2 Prompt Engineering Guide](https://docs.google.com/document/d/11WlzjBT0xRpQhP9tFMtxzd0q6ANIdHPUBkMV-YB043U/edit#)
 - [DALL·E 2 Preview - Risks and Limitations](https://github.com/openai/dalle-2-preview/blob/main/system-card.md)
 - [DALLE Prompt Book](https://dallery.gallery/the-dalle-2-prompt-book)
--- a/pages/risks.en.mdx
+++ b/pages/risks.en.mdx
--- a/pages/risks/_meta.en.json
+++ b/pages/risks/_meta.en.json
--- a/pages/risks/adversarial.en.mdx
+++ b/pages/risks/adversarial.en.mdx
--- a/pages/risks/biases.en.mdx
+++ b/pages/risks/biases.en.mdx
--- a/pages/risks/factuality.en.mdx
+++ b/pages/risks/factuality.en.mdx
--- a/pages/techniques.en.mdx
+++ b/pages/techniques.en.mdx
--- a/pages/techniques/_meta.en.json
+++ b/pages/techniques/_meta.en.json
--- a/pages/techniques/activeprompt.en.mdx
+++ b/pages/techniques/activeprompt.en.mdx
--- a/pages/techniques/ape.en.mdx
+++ b/pages/techniques/ape.en.mdx
--- a/pages/techniques/consistency.en.mdx
+++ b/pages/techniques/consistency.en.mdx
--- a/pages/techniques/cot.en.mdx
+++ b/pages/techniques/cot.en.mdx
--- a/pages/techniques/dsp.en.mdx
+++ b/pages/techniques/dsp.en.mdx
--- a/pages/techniques/fewshot.en.mdx
+++ b/pages/techniques/fewshot.en.mdx
@ -2,6 +2,8 @@

 While large-language models demonstrate remarkable zero-shot capabilities, they still fall short on more complex tasks when using the zero-shot setting. Few-shot prompting can be used as a technique to enable in-context learning where we provide demonstrations in the prompt to steer the model to better performance. The demonstrations serve as conditioning for subsequent examples where we would like the model to generate a response.

+According to [Touvron et al. 2023](https://arxiv.org/pdf/2302.13971.pdf) few shot properties first appeared when models were scaled to a sufficient size [(Kaplan et al., 2020)](https://arxiv.org/abs/2001.08361).
+
 Let's demonstrate few-shot prompting via an example that was presented in [Brown et al. 2020](https://arxiv.org/abs/2005.14165). In the example, the task is to correctly use a new word in a sentence.

 *Prompt:*
--- a/pages/techniques/graph.en.mdx
+++ b/pages/techniques/graph.en.mdx
--- a/pages/techniques/knowledge.en.mdx
+++ b/pages/techniques/knowledge.en.mdx
--- a/pages/techniques/multimodalcot.en.mdx
+++ b/pages/techniques/multimodalcot.en.mdx
--- a/pages/techniques/react.en.mdx
+++ b/pages/techniques/react.en.mdx
--- a/pages/techniques/zeroshot.en.mdx
+++ b/pages/techniques/zeroshot.en.mdx
--- a/pages/tools.en.mdx
+++ b/pages/tools.en.mdx
--- a/theme.config.tsx
+++ b/theme.config.tsx
@ -15,6 +15,10 @@ const config: DocsThemeConfig = {
      </span>
    </>
  ),
+  i18n: [
+    { locale: 'en', text: 'English' },
+    { locale: 'zh', text: '中文' }
+  ],
  head: function UseHead() {
    const { title } = useConfig()
    return (
				`@ -0,0 +1 @@`
				`export { locales as middleware } from 'nextra/locales'`