rfc1341.txt 206KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266
  1. Network Working Group N. Borenstein, Bellcore
  2. Request for Comments: 1341 N. Freed, Innosoft
  3. June 1992
  4. MIME (Multipurpose Internet Mail Extensions):
  5. Mechanisms for Specifying and Describing
  6. the Format of Internet Message Bodies
  7. Status of this Memo
  8. This RFC specifies an IAB standards track protocol for the
  9. Internet community, and requests discussion and suggestions
  10. for improvements. Please refer to the current edition of
  11. the "IAB Official Protocol Standards" for the
  12. standardization state and status of this protocol.
  13. Distribution of this memo is unlimited.
  14. Abstract
  15. RFC 822 defines a message representation protocol which
  16. specifies considerable detail about message headers, but
  17. which leaves the message content, or message body, as flat
  18. ASCII text. This document redefines the format of message
  19. bodies to allow multi-part textual and non-textual message
  20. bodies to be represented and exchanged without loss of
  21. information. This is based on earlier work documented in
  22. RFC 934 and RFC 1049, but extends and revises that work.
  23. Because RFC 822 said so little about message bodies, this
  24. document is largely orthogonal to (rather than a revision
  25. of) RFC 822.
  26. In particular, this document is designed to provide
  27. facilities to include multiple objects in a single message,
  28. to represent body text in character sets other than US-
  29. ASCII, to represent formatted multi-font text messages, to
  30. represent non-textual material such as images and audio
  31. fragments, and generally to facilitate later extensions
  32. defining new types of Internet mail for use by cooperating
  33. mail agents.
  34. This document does NOT extend Internet mail header fields to
  35. permit anything other than US-ASCII text data. It is
  36. recognized that such extensions are necessary, and they are
  37. the subject of a companion document [RFC -1342].
  38. A table of contents appears at the end of this document.
  39. Borenstein & Freed [Page i]
  40. 1 Introduction
  41. Since its publication in 1982, RFC 822 [RFC-822] has defined
  42. the standard format of textual mail messages on the
  43. Internet. Its success has been such that the RFC 822 format
  44. has been adopted, wholly or partially, well beyond the
  45. confines of the Internet and the Internet SMTP transport
  46. defined by RFC 821 [RFC-821]. As the format has seen wider
  47. use, a number of limitations have proven increasingly
  48. restrictive for the user community.
  49. RFC 822 was intended to specify a format for text messages.
  50. As such, non-text messages, such as multimedia messages that
  51. might include audio or images, are simply not mentioned.
  52. Even in the case of text, however, RFC 822 is inadequate for
  53. the needs of mail users whose languages require the use of
  54. character sets richer than US ASCII [US-ASCII]. Since RFC
  55. 822 does not specify mechanisms for mail containing audio,
  56. video, Asian language text, or even text in most European
  57. languages, additional specifications are needed
  58. One of the notable limitations of RFC 821/822 based mail
  59. systems is the fact that they limit the contents of
  60. electronic mail messages to relatively short lines of
  61. seven-bit ASCII. This forces users to convert any non-
  62. textual data that they may wish to send into seven-bit bytes
  63. representable as printable ASCII characters before invoking
  64. a local mail UA (User Agent, a program with which human
  65. users send and receive mail). Examples of such encodings
  66. currently used in the Internet include pure hexadecimal,
  67. uuencode, the 3-in-4 base 64 scheme specified in RFC 1113,
  68. the Andrew Toolkit Representation [ATK], and many others.
  69. The limitations of RFC 822 mail become even more apparent as
  70. gateways are designed to allow for the exchange of mail
  71. messages between RFC 822 hosts and X.400 hosts. X.400 [X400]
  72. specifies mechanisms for the inclusion of non-textual body
  73. parts within electronic mail messages. The current
  74. standards for the mapping of X.400 messages to RFC 822
  75. messages specify that either X.400 non-textual body parts
  76. should be converted to (not encoded in) an ASCII format, or
  77. that they should be discarded, notifying the RFC 822 user
  78. that discarding has occurred. This is clearly undesirable,
  79. as information that a user may wish to receive is lost.
  80. Even though a user's UA may not have the capability of
  81. dealing with the non-textual body part, the user might have
  82. some mechanism external to the UA that can extract useful
  83. information from the body part. Moreover, it does not allow
  84. for the fact that the message may eventually be gatewayed
  85. back into an X.400 message handling system (i.e., the X.400
  86. message is "tunneled" through Internet mail), where the
  87. non-textual information would definitely become useful
  88. again.
  89. Borenstein & Freed [Page 1]
  90. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  91. This document describes several mechanisms that combine to
  92. solve most of these problems without introducing any serious
  93. incompatibilities with the existing world of RFC 822 mail.
  94. In particular, it describes:
  95. 1. A MIME-Version header field, which uses a version number
  96. to declare a message to be conformant with this
  97. specification and allows mail processing agents to
  98. distinguish between such messages and those generated
  99. by older or non-conformant software, which is presumed
  100. to lack such a field.
  101. 2. A Content-Type header field, generalized from RFC 1049
  102. [RFC-1049], which can be used to specify the type and
  103. subtype of data in the body of a message and to fully
  104. specify the native representation (encoding) of such
  105. data.
  106. 2.a. A "text" Content-Type value, which can be used to
  107. represent textual information in a number of
  108. character sets and formatted text description
  109. languages in a standardized manner.
  110. 2.b. A "multipart" Content-Type value, which can be
  111. used to combine several body parts, possibly of
  112. differing types of data, into a single message.
  113. 2.c. An "application" Content-Type value, which can be
  114. used to transmit application data or binary data,
  115. and hence, among other uses, to implement an
  116. electronic mail file transfer service.
  117. 2.d. A "message" Content-Type value, for encapsulating
  118. a mail message.
  119. 2.e An "image" Content-Type value, for transmitting
  120. still image (picture) data.
  121. 2.f. An "audio" Content-Type value, for transmitting
  122. audio or voice data.
  123. 2.g. A "video" Content-Type value, for transmitting
  124. video or moving image data, possibly with audio as
  125. part of the composite video data format.
  126. 3. A Content-Transfer-Encoding header field, which can be
  127. used to specify an auxiliary encoding that was applied
  128. to the data in order to allow it to pass through mail
  129. transport mechanisms which may have data or character
  130. set limitations.
  131. 4. Two optional header fields that can be used to further
  132. describe the data in a message body, the Content-ID and
  133. Content-Description header fields.
  134. Borenstein & Freed [Page 2]
  135. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  136. MIME has been carefully designed as an extensible mechanism,
  137. and it is expected that the set of content-type/subtype
  138. pairs and their associated parameters will grow
  139. significantly with time. Several other MIME fields, notably
  140. including character set names, are likely to have new values
  141. defined over time. In order to ensure that the set of such
  142. values is developed in an orderly, well-specified, and
  143. public manner, MIME defines a registration process which
  144. uses the Internet Assigned Numbers Authority (IANA) as a
  145. central registry for such values. Appendix F provides
  146. details about how IANA registration is accomplished.
  147. Finally, to specify and promote interoperability, Appendix A
  148. of this document provides a basic applicability statement
  149. for a subset of the above mechanisms that defines a minimal
  150. level of "conformance" with this document.
  151. HISTORICAL NOTE: Several of the mechanisms described in
  152. this document may seem somewhat strange or even baroque at
  153. first reading. It is important to note that compatibility
  154. with existing standards AND robustness across existing
  155. practice were two of the highest priorities of the working
  156. group that developed this document. In particular,
  157. compatibility was always favored over elegance.
  158. 2 Notations, Conventions, and Generic BNF Grammar
  159. This document is being published in two versions, one as
  160. plain ASCII text and one as PostScript. The latter is
  161. recommended, though the textual contents are identical. An
  162. Andrew-format copy of this document is also available from
  163. the first author (Borenstein).
  164. Although the mechanisms specified in this document are all
  165. described in prose, most are also described formally in the
  166. modified BNF notation of RFC 822. Implementors will need to
  167. be familiar with this notation in order to understand this
  168. specification, and are referred to RFC 822 for a complete
  169. explanation of the modified BNF notation.
  170. Some of the modified BNF in this document makes reference to
  171. syntactic entities that are defined in RFC 822 and not in
  172. this document. A complete formal grammar, then, is obtained
  173. by combining the collected grammar appendix of this document
  174. with that of RFC 822.
  175. The term CRLF, in this document, refers to the sequence of
  176. the two ASCII characters CR (13) and LF (10) which, taken
  177. together, in this order, denote a line break in RFC 822
  178. mail.
  179. The term "character set", wherever it is used in this
  180. document, refers to a coded character set, in the sense of
  181. ISO character set standardization work, and must not be
  182. Borenstein & Freed [Page 3]
  183. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  184. misinterpreted as meaning "a set of characters."
  185. The term "message", when not further qualified, means either
  186. the (complete or "top-level") message being transferred on a
  187. network, or a message encapsulated in a body of type
  188. "message".
  189. The term "body part", in this document, means one of the
  190. parts of the body of a multipart entity. A body part has a
  191. header and a body, so it makes sense to speak about the body
  192. of a body part.
  193. The term "entity", in this document, means either a message
  194. or a body part. All kinds of entities share the property
  195. that they have a header and a body.
  196. The term "body", when not further qualified, means the body
  197. of an entity, that is the body of either a message or of a
  198. body part.
  199. Note : the previous four definitions are clearly circular.
  200. This is unavoidable, since the overal structure of a MIME
  201. message is indeed recursive.
  202. In this document, all numeric and octet values are given in
  203. decimal notation.
  204. It must be noted that Content-Type values, subtypes, and
  205. parameter names as defined in this document are case-
  206. insensitive. However, parameter values are case-sensitive
  207. unless otherwise specified for the specific parameter.
  208. FORMATTING NOTE: This document has been carefully formatted
  209. for ease of reading. The PostScript version of this
  210. document, in particular, places notes like this one, which
  211. may be skipped by the reader, in a smaller, italicized,
  212. font, and indents it as well. In the text version, only the
  213. indentation is preserved, so if you are reading the text
  214. version of this you might consider using the PostScript
  215. version instead. However, all such notes will be indented
  216. and preceded by "NOTE:" or some similar introduction, even
  217. in the text version.
  218. The primary purpose of these non-essential notes is to
  219. convey information about the rationale of this document, or
  220. to place this document in the proper historical or
  221. evolutionary context. Such information may be skipped by
  222. those who are focused entirely on building a compliant
  223. implementation, but may be of use to those who wish to
  224. understand why this document is written as it is.
  225. For ease of recognition, all BNF definitions have been
  226. placed in a fixed-width font in the PostScript version of
  227. this document.
  228. Borenstein & Freed [Page 4]
  229. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  230. 3 The MIME-Version Header Field
  231. Since RFC 822 was published in 1982, there has really been
  232. only one format standard for Internet messages, and there
  233. has been little perceived need to declare the format
  234. standard in use. This document is an independent document
  235. that complements RFC 822. Although the extensions in this
  236. document have been defined in such a way as to be compatible
  237. with RFC 822, there are still circumstances in which it
  238. might be desirable for a mail-processing agent to know
  239. whether a message was composed with the new standard in
  240. mind.
  241. Therefore, this document defines a new header field, "MIME-
  242. Version", which is to be used to declare the version of the
  243. Internet message body format standard in use.
  244. Messages composed in accordance with this document MUST
  245. include such a header field, with the following verbatim
  246. text:
  247. MIME-Version: 1.0
  248. The presence of this header field is an assertion that the
  249. message has been composed in compliance with this document.
  250. Since it is possible that a future document might extend the
  251. message format standard again, a formal BNF is given for the
  252. content of the MIME-Version field:
  253. MIME-Version := text
  254. Thus, future format specifiers, which might replace or
  255. extend "1.0", are (minimally) constrained by the definition
  256. of "text", which appears in RFC 822.
  257. Note that the MIME-Version header field is required at the
  258. top level of a message. It is not required for each body
  259. part of a multipart entity. It is required for the embedded
  260. headers of a body of type "message" if and only if the
  261. embedded message is itself claimed to be MIME-compliant.
  262. Borenstein & Freed [Page 5]
  263. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  264. 4 The Content-Type Header Field
  265. The purpose of the Content-Type field is to describe the
  266. data contained in the body fully enough that the receiving
  267. user agent can pick an appropriate agent or mechanism to
  268. present the data to the user, or otherwise deal with the
  269. data in an appropriate manner.
  270. HISTORICAL NOTE: The Content-Type header field was first
  271. defined in RFC 1049. RFC 1049 Content-types used a simpler
  272. and less powerful syntax, but one that is largely compatible
  273. with the mechanism given here.
  274. The Content-Type header field is used to specify the nature
  275. of the data in the body of an entity, by giving type and
  276. subtype identifiers, and by providing auxiliary information
  277. that may be required for certain types. After the type and
  278. subtype names, the remainder of the header field is simply a
  279. set of parameters, specified in an attribute/value notation.
  280. The set of meaningful parameters differs for the different
  281. types. The ordering of parameters is not significant.
  282. Among the defined parameters is a "charset" parameter by
  283. which the character set used in the body may be declared.
  284. Comments are allowed in accordance with RFC 822 rules for
  285. structured header fields.
  286. In general, the top-level Content-Type is used to declare
  287. the general type of data, while the subtype specifies a
  288. specific format for that type of data. Thus, a Content-Type
  289. of "image/xyz" is enough to tell a user agent that the data
  290. is an image, even if the user agent has no knowledge of the
  291. specific image format "xyz". Such information can be used,
  292. for example, to decide whether or not to show a user the raw
  293. data from an unrecognized subtype -- such an action might be
  294. reasonable for unrecognized subtypes of text, but not for
  295. unrecognized subtypes of image or audio. For this reason,
  296. registered subtypes of audio, image, text, and video, should
  297. not contain embedded information that is really of a
  298. different type. Such compound types should be represented
  299. using the "multipart" or "application" types.
  300. Parameters are modifiers of the content-subtype, and do not
  301. fundamentally affect the requirements of the host system.
  302. Although most parameters make sense only with certain
  303. content-types, others are "global" in the sense that they
  304. might apply to any subtype. For example, the "boundary"
  305. parameter makes sense only for the "multipart" content-type,
  306. but the "charset" parameter might make sense with several
  307. content-types.
  308. An initial set of seven Content-Types is defined by this
  309. document. This set of top-level names is intended to be
  310. substantially complete. It is expected that additions to
  311. the larger set of supported types can generally be
  312. Borenstein & Freed [Page 6]
  313. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  314. accomplished by the creation of new subtypes of these
  315. initial types. In the future, more top-level types may be
  316. defined only by an extension to this standard. If another
  317. primary type is to be used for any reason, it must be given
  318. a name starting with "X-" to indicate its non-standard
  319. status and to avoid a potential conflict with a future
  320. official name.
  321. In the Extended BNF notation of RFC 822, a Content-Type
  322. header field value is defined as follows:
  323. Content-Type := type "/" subtype *[";" parameter]
  324. type := "application" / "audio"
  325. / "image" / "message"
  326. / "multipart" / "text"
  327. / "video" / x-token
  328. x-token := <The two characters "X-" followed, with no
  329. intervening white space, by any token>
  330. subtype := token
  331. parameter := attribute "=" value
  332. attribute := token
  333. value := token / quoted-string
  334. token := 1*<any CHAR except SPACE, CTLs, or tspecials>
  335. tspecials := "(" / ")" / "<" / ">" / "@" ; Must be in
  336. / "," / ";" / ":" / "\" / <"> ; quoted-string,
  337. / "/" / "[" / "]" / "?" / "." ; to use within
  338. / "=" ; parameter values
  339. Note that the definition of "tspecials" is the same as the
  340. RFC 822 definition of "specials" with the addition of the
  341. three characters "/", "?", and "=".
  342. Note also that a subtype specification is MANDATORY. There
  343. are no default subtypes.
  344. The type, subtype, and parameter names are not case
  345. sensitive. For example, TEXT, Text, and TeXt are all
  346. equivalent. Parameter values are normally case sensitive,
  347. but certain parameters are interpreted to be case-
  348. insensitive, depending on the intended use. (For example,
  349. multipart boundaries are case-sensitive, but the "access-
  350. type" for message/External-body is not case-sensitive.)
  351. Beyond this syntax, the only constraint on the definition of
  352. subtype names is the desire that their uses must not
  353. conflict. That is, it would be undesirable to have two
  354. Borenstein & Freed [Page 7]
  355. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  356. different communities using "Content-Type:
  357. application/foobar" to mean two different things. The
  358. process of defining new content-subtypes, then, is not
  359. intended to be a mechanism for imposing restrictions, but
  360. simply a mechanism for publicizing the usages. There are,
  361. therefore, two acceptable mechanisms for defining new
  362. Content-Type subtypes:
  363. 1. Private values (starting with "X-") may be
  364. defined bilaterally between two cooperating
  365. agents without outside registration or
  366. standardization.
  367. 2. New standard values must be documented,
  368. registered with, and approved by IANA, as
  369. described in Appendix F. Where intended for
  370. public use, the formats they refer to must
  371. also be defined by a published specification,
  372. and possibly offered for standardization.
  373. The seven standard initial predefined Content-Types are
  374. detailed in the bulk of this document. They are:
  375. text -- textual information. The primary subtype,
  376. "plain", indicates plain (unformatted) text. No
  377. special software is required to get the full
  378. meaning of the text, aside from support for the
  379. indicated character set. Subtypes are to be used
  380. for enriched text in forms where application
  381. software may enhance the appearance of the text,
  382. but such software must not be required in order to
  383. get the general idea of the content. Possible
  384. subtypes thus include any readable word processor
  385. format. A very simple and portable subtype,
  386. richtext, is defined in this document.
  387. multipart -- data consisting of multiple parts of
  388. independent data types. Four initial subtypes
  389. are defined, including the primary "mixed"
  390. subtype, "alternative" for representing the same
  391. data in multiple formats, "parallel" for parts
  392. intended to be viewed simultaneously, and "digest"
  393. for multipart entities in which each part is of
  394. type "message".
  395. message -- an encapsulated message. A body of
  396. Content-Type "message" is itself a fully formatted
  397. RFC 822 conformant message which may contain its
  398. own different Content-Type header field. The
  399. primary subtype is "rfc822". The "partial"
  400. subtype is defined for partial messages, to permit
  401. the fragmented transmission of bodies that are
  402. thought to be too large to be passed through mail
  403. transport facilities. Another subtype,
  404. "External-body", is defined for specifying large
  405. bodies by reference to an external data source.
  406. Borenstein & Freed [Page 8]
  407. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  408. image -- image data. Image requires a display device
  409. (such as a graphical display, a printer, or a FAX
  410. machine) to view the information. Initial
  411. subtypes are defined for two widely-used image
  412. formats, jpeg and gif.
  413. audio -- audio data, with initial subtype "basic".
  414. Audio requires an audio output device (such as a
  415. speaker or a telephone) to "display" the contents.
  416. video -- video data. Video requires the capability to
  417. display moving images, typically including
  418. specialized hardware and software. The initial
  419. subtype is "mpeg".
  420. application -- some other kind of data, typically
  421. either uninterpreted binary data or information to
  422. be processed by a mail-based application. The
  423. primary subtype, "octet-stream", is to be used in
  424. the case of uninterpreted binary data, in which
  425. case the simplest recommended action is to offer
  426. to write the information into a file for the user.
  427. Two additional subtypes, "ODA" and "PostScript",
  428. are defined for transporting ODA and PostScript
  429. documents in bodies. Other expected uses for
  430. "application" include spreadsheets, data for
  431. mail-based scheduling systems, and languages for
  432. "active" (computational) email. (Note that active
  433. email entails several securityconsiderations,
  434. which are discussed later in this memo,
  435. particularly in the context of
  436. application/PostScript.)
  437. Default RFC 822 messages are typed by this protocol as plain
  438. text in the US-ASCII character set, which can be explicitly
  439. specified as "Content-type: text/plain; charset=us-ascii".
  440. If no Content-Type is specified, either by error or by an
  441. older user agent, this default is assumed. In the presence
  442. of a MIME-Version header field, a receiving User Agent can
  443. also assume that plain US-ASCII text was the sender's
  444. intent. In the absence of a MIME-Version specification,
  445. plain US-ASCII text must still be assumed, but the sender's
  446. intent might have been otherwise.
  447. RATIONALE: In the absence of any Content-Type header field
  448. or MIME-Version header field, it is impossible to be certain
  449. that a message is actually text in the US-ASCII character
  450. set, since it might well be a message that, using the
  451. conventions that predate this document, includes text in
  452. another character set or non-textual data in a manner that
  453. cannot be automatically recognized (e.g., a uuencoded
  454. compressed UNIX tar file). Although there is no fully
  455. acceptable alternative to treating such untyped messages as
  456. "text/plain; charset=us-ascii", implementors should remain
  457. aware that if a message lacks both the MIME-Version and the
  458. Content-Type header fields, it may in practice contain
  459. almost anything.
  460. Borenstein & Freed [Page 9]
  461. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  462. It should be noted that the list of Content-Type values
  463. given here may be augmented in time, via the mechanisms
  464. described above, and that the set of subtypes is expected to
  465. grow substantially.
  466. When a mail reader encounters mail with an unknown Content-
  467. type value, it should generally treat it as equivalent to
  468. "application/octet-stream", as described later in this
  469. document.
  470. 5 The Content-Transfer-Encoding Header Field
  471. Many Content-Types which could usefully be transported via
  472. email are represented, in their "natural" format, as 8-bit
  473. character or binary data. Such data cannot be transmitted
  474. over some transport protocols. For example, RFC 821
  475. restricts mail messages to 7-bit US-ASCII data with 1000
  476. character lines.
  477. It is necessary, therefore, to define a standard mechanism
  478. for re-encoding such data into a 7-bit short-line format.
  479. This document specifies that such encodings will be
  480. indicated by a new "Content-Transfer-Encoding" header field.
  481. The Content-Transfer-Encoding field is used to indicate the
  482. type of transformation that has been used in order to
  483. represent the body in an acceptable manner for transport.
  484. Unlike Content-Types, a proliferation of Content-Transfer-
  485. Encoding values is undesirable and unnecessary. However,
  486. establishing only a single Content-Transfer-Encoding
  487. mechanism does not seem possible. There is a tradeoff
  488. between the desire for a compact and efficient encoding of
  489. largely-binary data and the desire for a readable encoding
  490. of data that is mostly, but not entirely, 7-bit data. For
  491. this reason, at least two encoding mechanisms are necessary:
  492. a "readable" encoding and a "dense" encoding.
  493. The Content-Transfer-Encoding field is designed to specify
  494. an invertible mapping between the "native" representation of
  495. a type of data and a representation that can be readily
  496. exchanged using 7 bit mail transport protocols, such as
  497. those defined by RFC 821 (SMTP). This field has not been
  498. defined by any previous standard. The field's value is a
  499. single token specifying the type of encoding, as enumerated
  500. below. Formally:
  501. Content-Transfer-Encoding := "BASE64" / "QUOTED-PRINTABLE" /
  502. "8BIT" / "7BIT" /
  503. "BINARY" / x-token
  504. These values are not case sensitive. That is, Base64 and
  505. BASE64 and bAsE64 are all equivalent. An encoding type of
  506. 7BIT requires that the body is already in a seven-bit mail-
  507. ready representation. This is the default value -- that is,
  508. Borenstein & Freed [Page 10]
  509. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  510. "Content-Transfer-Encoding: 7BIT" is assumed if the
  511. Content-Transfer-Encoding header field is not present.
  512. The values "8bit", "7bit", and "binary" all imply that NO
  513. encoding has been performed. However, they are potentially
  514. useful as indications of the kind of data contained in the
  515. object, and therefore of the kind of encoding that might
  516. need to be performed for transmission in a given transport
  517. system. "7bit" means that the data is all represented as
  518. short lines of US-ASCII data. "8bit" means that the lines
  519. are short, but there may be non-ASCII characters (octets
  520. with the high-order bit set). "Binary" means that not only
  521. may non-ASCII characters be present, but also that the lines
  522. are not necessarily short enough for SMTP transport.
  523. The difference between "8bit" (or any other conceivable
  524. bit-width token) and the "binary" token is that "binary"
  525. does not require adherence to any limits on line length or
  526. to the SMTP CRLF semantics, while the bit-width tokens do
  527. require such adherence. If the body contains data in any
  528. bit-width other than 7-bit, the appropriate bit-width
  529. Content-Transfer-Encoding token must be used (e.g., "8bit"
  530. for unencoded 8 bit wide data). If the body contains binary
  531. data, the "binary" Content-Transfer-Encoding token must be
  532. used.
  533. NOTE: The distinction between the Content-Transfer-Encoding
  534. values of "binary," "8bit," etc. may seem unimportant, in
  535. that all of them really mean "none" -- that is, there has
  536. been no encoding of the data for transport. However, clear
  537. labeling will be of enormous value to gateways between
  538. future mail transport systems with differing capabilities in
  539. transporting data that do not meet the restrictions of RFC
  540. 821 transport.
  541. As of the publication of this document, there are no
  542. standardized Internet transports for which it is legitimate
  543. to include unencoded 8-bit or binary data in mail bodies.
  544. Thus there are no circumstances in which the "8bit" or
  545. "binary" Content-Transfer-Encoding is actually legal on the
  546. Internet. However, in the event that 8-bit or binary mail
  547. transport becomes a reality in Internet mail, or when this
  548. document is used in conjunction with any other 8-bit or
  549. binary-capable transport mechanism, 8-bit or binary bodies
  550. should be labeled as such using this mechanism.
  551. NOTE: The five values defined for the Content-Transfer-
  552. Encoding field imply nothing about the Content-Type other
  553. than the algorithm by which it was encoded or the transport
  554. system requirements if unencoded.
  555. Implementors may, if necessary, define new Content-
  556. Transfer-Encoding values, but must use an x-token, which is
  557. a name prefixed by "X-" to indicate its non-standard status,
  558. Borenstein & Freed [Page 11]
  559. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  560. e.g., "Content-Transfer-Encoding: x-my-new-encoding".
  561. However, unlike Content-Types and subtypes, the creation of
  562. new Content-Transfer-Encoding values is explicitly and
  563. strongly discouraged, as it seems likely to hinder
  564. interoperability with little potential benefit. Their use
  565. is allowed only as the result of an agreement between
  566. cooperating user agents.
  567. If a Content-Transfer-Encoding header field appears as part
  568. of a message header, it applies to the entire body of that
  569. message. If a Content-Transfer-Encoding header field
  570. appears as part of a body part's headers, it applies only to
  571. the body of that body part. If an entity is of type
  572. "multipart" or "message", the Content-Transfer-Encoding is
  573. not permitted to have any value other than a bit width
  574. (e.g., "7bit", "8bit", etc.) or "binary".
  575. It should be noted that email is character-oriented, so that
  576. the mechanisms described here are mechanisms for encoding
  577. arbitrary byte streams, not bit streams. If a bit stream is
  578. to be encoded via one of these mechanisms, it must first be
  579. converted to an 8-bit byte stream using the network standard
  580. bit order ("big-endian"), in which the earlier bits in a
  581. stream become the higher-order bits in a byte. A bit stream
  582. not ending at an 8-bit boundary must be padded with zeroes.
  583. This document provides a mechanism for noting the addition
  584. of such padding in the case of the application Content-Type,
  585. which has a "padding" parameter.
  586. The encoding mechanisms defined here explicitly encode all
  587. data in ASCII. Thus, for example, suppose an entity has
  588. header fields such as:
  589. Content-Type: text/plain; charset=ISO-8859-1
  590. Content-transfer-encoding: base64
  591. This should be interpreted to mean that the body is a base64
  592. ASCII encoding of data that was originally in ISO-8859-1,
  593. and will be in that character set again after decoding.
  594. The following sections will define the two standard encoding
  595. mechanisms. The definition of new content-transfer-
  596. encodings is explicitly discouraged and should only occur
  597. when absolutely necessary. All content-transfer-encoding
  598. namespace except that beginning with "X-" is explicitly
  599. reserved to the IANA for future use. Private agreements
  600. about content-transfer-encodings are also explicitly
  601. discouraged.
  602. Certain Content-Transfer-Encoding values may only be used on
  603. certain Content-Types. In particular, it is expressly
  604. forbidden to use any encodings other than "7bit", "8bit", or
  605. "binary" with any Content-Type that recursively includes
  606. other Content-Type fields, notably the "multipart" and
  607. Borenstein & Freed [Page 12]
  608. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  609. "message" Content-Types. All encodings that are desired for
  610. bodies of type multipart or message must be done at the
  611. innermost level, by encoding the actual body that needs to
  612. be encoded.
  613. NOTE ON ENCODING RESTRICTIONS: Though the prohibition
  614. against using content-transfer-encodings on data of type
  615. multipart or message may seem overly restrictive, it is
  616. necessary to prevent nested encodings, in which data are
  617. passed through an encoding algorithm multiple times, and
  618. must be decoded multiple times in order to be properly
  619. viewed. Nested encodings add considerable complexity to
  620. user agents: aside from the obvious efficiency problems
  621. with such multiple encodings, they can obscure the basic
  622. structure of a message. In particular, they can imply that
  623. several decoding operations are necessary simply to find out
  624. what types of objects a message contains. Banning nested
  625. encodings may complicate the job of certain mail gateways,
  626. but this seems less of a problem than the effect of nested
  627. encodings on user agents.
  628. NOTE ON THE RELATIONSHIP BETWEEN CONTENT-TYPE AND CONTENT-
  629. TRANSFER-ENCODING: It may seem that the Content-Transfer-
  630. Encoding could be inferred from the characteristics of the
  631. Content-Type that is to be encoded, or, at the very least,
  632. that certain Content-Transfer-Encodings could be mandated
  633. for use with specific Content-Types. There are several
  634. reasons why this is not the case. First, given the varying
  635. types of transports used for mail, some encodings may be
  636. appropriate for some Content-Type/transport combinations and
  637. not for others. (For example, in an 8-bit transport, no
  638. encoding would be required for text in certain character
  639. sets, while such encodings are clearly required for 7-bit
  640. SMTP.) Second, certain Content-Types may require different
  641. types of transfer encoding under different circumstances.
  642. For example, many PostScript bodies might consist entirely
  643. of short lines of 7-bit data and hence require little or no
  644. encoding. Other PostScript bodies (especially those using
  645. Level 2 PostScript's binary encoding mechanism) may only be
  646. reasonably represented using a binary transport encoding.
  647. Finally, since Content-Type is intended to be an open-ended
  648. specification mechanism, strict specification of an
  649. association between Content-Types and encodings effectively
  650. couples the specification of an application protocol with a
  651. specific lower-level transport. This is not desirable since
  652. the developers of a Content-Type should not have to be aware
  653. of all the transports in use and what their limitations are.
  654. NOTE ON TRANSLATING ENCODINGS: The quoted-printable and
  655. base64 encodings are designed so that conversion between
  656. them is possible. The only issue that arises in such a
  657. conversion is the handling of line breaks. When converting
  658. from quoted-printable to base64 a line break must be
  659. converted into a CRLF sequence. Similarly, a CRLF sequence
  660. Borenstein & Freed [Page 13]
  661. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  662. in base64 data should be converted to a quoted-printable
  663. line break, but ONLY when converting text data.
  664. NOTE ON CANONICAL ENCODING MODEL: There was some
  665. confusion, in earlier drafts of this memo, regarding the
  666. model for when email data was to be converted to canonical
  667. form and encoded, and in particular how this process would
  668. affect the treatment of CRLFs, given that the representation
  669. of newlines varies greatly from system to system. For this
  670. reason, a canonical model for encoding is presented as
  671. Appendix H.
  672. 5.1 Quoted-Printable Content-Transfer-Encoding
  673. The Quoted-Printable encoding is intended to represent data
  674. that largely consists of octets that correspond to printable
  675. characters in the ASCII character set. It encodes the data
  676. in such a way that the resulting octets are unlikely to be
  677. modified by mail transport. If the data being encoded are
  678. mostly ASCII text, the encoded form of the data remains
  679. largely recognizable by humans. A body which is entirely
  680. ASCII may also be encoded in Quoted-Printable to ensure the
  681. integrity of the data should the message pass through a
  682. character-translating, and/or line-wrapping gateway.
  683. In this encoding, octets are to be represented as determined
  684. by the following rules:
  685. Rule #1: (General 8-bit representation) Any octet,
  686. except those indicating a line break according to the
  687. newline convention of the canonical form of the data
  688. being encoded, may be represented by an "=" followed by
  689. a two digit hexadecimal representation of the octet's
  690. value. The digits of the hexadecimal alphabet, for this
  691. purpose, are "0123456789ABCDEF". Uppercase letters must
  692. be
  693. used when sending hexadecimal data, though a robust
  694. implementation may choose to recognize lowercase
  695. letters on receipt. Thus, for example, the value 12
  696. (ASCII form feed) can be represented by "=0C", and the
  697. value 61 (ASCII EQUAL SIGN) can be represented by
  698. "=3D". Except when the following rules allow an
  699. alternative encoding, this rule is mandatory.
  700. Rule #2: (Literal representation) Octets with decimal
  701. values of 33 through 60 inclusive, and 62 through 126,
  702. inclusive, MAY be represented as the ASCII characters
  703. which correspond to those octets (EXCLAMATION POINT
  704. through LESS THAN, and GREATER THAN through TILDE,
  705. respectively).
  706. Rule #3: (White Space): Octets with values of 9 and 32
  707. MAY be represented as ASCII TAB (HT) and SPACE
  708. characters, respectively, but MUST NOT be so
  709. Borenstein & Freed [Page 14]
  710. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  711. represented at the end of an encoded line. Any TAB (HT)
  712. or SPACE characters on an encoded line MUST thus be
  713. followed on that line by a printable character. In
  714. particular, an "=" at the end of an encoded line,
  715. indicating a soft line break (see rule #5) may follow
  716. one or more TAB (HT) or SPACE characters. It follows
  717. that an octet with value 9 or 32 appearing at the end
  718. of an encoded line must be represented according to
  719. Rule #1. This rule is necessary because some MTAs
  720. (Message Transport Agents, programs which transport
  721. messages from one user to another, or perform a part of
  722. such transfers) are known to pad lines of text with
  723. SPACEs, and others are known to remove "white space"
  724. characters from the end of a line. Therefore, when
  725. decoding a Quoted-Printable body, any trailing white
  726. space on a line must be deleted, as it will necessarily
  727. have been added by intermediate transport agents.
  728. Rule #4 (Line Breaks): A line break in a text body
  729. part, independent of what its representation is
  730. following the canonical representation of the data
  731. being encoded, must be represented by a (RFC 822) line
  732. break, which is a CRLF sequence, in the Quoted-
  733. Printable encoding. If isolated CRs and LFs, or LF CR
  734. and CR LF sequences are allowed to appear in binary
  735. data according to the canonical form, they must be
  736. represented using the "=0D", "=0A", "=0A=0D" and
  737. "=0D=0A" notations respectively.
  738. Note that many implementation may elect to encode the
  739. local representation of various content types directly.
  740. In particular, this may apply to plain text material on
  741. systems that use newline conventions other than CRLF
  742. delimiters. Such an implementation is permissible, but
  743. the generation of line breaks must be generalized to
  744. account for the case where alternate representations of
  745. newline sequences are used.
  746. Rule #5 (Soft Line Breaks): The Quoted-Printable
  747. encoding REQUIRES that encoded lines be no more than 76
  748. characters long. If longer lines are to be encoded with
  749. the Quoted-Printable encoding, 'soft' line breaks must
  750. be used. An equal sign as the last character on a
  751. encoded line indicates such a non-significant ('soft')
  752. line break in the encoded text. Thus if the "raw" form
  753. of the line is a single unencoded line that says:
  754. Now's the time for all folk to come to the aid of
  755. their country.
  756. This can be represented, in the Quoted-Printable
  757. encoding, as
  758. Borenstein & Freed [Page 15]
  759. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  760. Now's the time =
  761. for all folk to come=
  762. to the aid of their country.
  763. This provides a mechanism with which long lines are
  764. encoded in such a way as to be restored by the user
  765. agent. The 76 character limit does not count the
  766. trailing CRLF, but counts all other characters,
  767. including any equal signs.
  768. Since the hyphen character ("-") is represented as itself in
  769. the Quoted-Printable encoding, care must be taken, when
  770. encapsulating a quoted-printable encoded body in a multipart
  771. entity, to ensure that the encapsulation boundary does not
  772. appear anywhere in the encoded body. (A good strategy is to
  773. choose a boundary that includes a character sequence such as
  774. "=_" which can never appear in a quoted-printable body. See
  775. the definition of multipart messages later in this
  776. document.)
  777. NOTE: The quoted-printable encoding represents something of
  778. a compromise between readability and reliability in
  779. transport. Bodies encoded with the quoted-printable
  780. encoding will work reliably over most mail gateways, but may
  781. not work perfectly over a few gateways, notably those
  782. involving translation into EBCDIC. (In theory, an EBCDIC
  783. gateway could decode a quoted-printable body and re-encode
  784. it using base64, but such gateways do not yet exist.) A
  785. higher level of confidence is offered by the base64
  786. Content-Transfer-Encoding. A way to get reasonably reliable
  787. transport through EBCDIC gateways is to also quote the ASCII
  788. characters
  789. !"#$@[\]^`{|}~
  790. according to rule #1. See Appendix B for more information.
  791. Because quoted-printable data is generally assumed to be
  792. line-oriented, it is to be expected that the breaks between
  793. the lines of quoted printable data may be altered in
  794. transport, in the same manner that plain text mail has
  795. always been altered in Internet mail when passing between
  796. systems with differing newline conventions. If such
  797. alterations are likely to constitute a corruption of the
  798. data, it is probably more sensible to use the base64
  799. encoding rather than the quoted-printable encoding.
  800. Borenstein & Freed [Page 16]
  801. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  802. 5.2 Base64 Content-Transfer-Encoding
  803. The Base64 Content-Transfer-Encoding is designed to
  804. represent arbitrary sequences of octets in a form that is
  805. not humanly readable. The encoding and decoding algorithms
  806. are simple, but the encoded data are consistently only about
  807. 33 percent larger than the unencoded data. This encoding is
  808. based on the one used in Privacy Enhanced Mail applications,
  809. as defined in RFC 1113. The base64 encoding is adapted
  810. from RFC 1113, with one change: base64 eliminates the "*"
  811. mechanism for embedded clear text.
  812. A 65-character subset of US-ASCII is used, enabling 6 bits
  813. to be represented per printable character. (The extra 65th
  814. character, "=", is used to signify a special processing
  815. function.)
  816. NOTE: This subset has the important property that it is
  817. represented identically in all versions of ISO 646,
  818. including US ASCII, and all characters in the subset are
  819. also represented identically in all versions of EBCDIC.
  820. Other popular encodings, such as the encoding used by the
  821. UUENCODE utility and the base85 encoding specified as part
  822. of Level 2 PostScript, do not share these properties, and
  823. thus do not fulfill the portability requirements a binary
  824. transport encoding for mail must meet.
  825. The encoding process represents 24-bit groups of input bits
  826. as output strings of 4 encoded characters. Proceeding from
  827. left to right, a 24-bit input group is formed by
  828. concatenating 3 8-bit input groups. These 24 bits are then
  829. treated as 4 concatenated 6-bit groups, each of which is
  830. translated into a single digit in the base64 alphabet. When
  831. encoding a bit stream via the base64 encoding, the bit
  832. stream must be presumed to be ordered with the most-
  833. significant-bit first. That is, the first bit in the stream
  834. will be the high-order bit in the first byte, and the eighth
  835. bit will be the low-order bit in the first byte, and so on.
  836. Each 6-bit group is used as an index into an array of 64
  837. printable characters. The character referenced by the index
  838. is placed in the output string. These characters, identified
  839. in Table 1, below, are selected so as to be universally
  840. representable, and the set excludes characters with
  841. particular significance to SMTP (e.g., ".", "CR", "LF") and
  842. to the encapsulation boundaries defined in this document
  843. (e.g., "-").
  844. Borenstein & Freed [Page 17]
  845. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  846. Table 1: The Base64 Alphabet
  847. Value Encoding Value Encoding Value Encoding Value
  848. Encoding
  849. 0 A 17 R 34 i 51 z
  850. 1 B 18 S 35 j 52 0
  851. 2 C 19 T 36 k 53 1
  852. 3 D 20 U 37 l 54 2
  853. 4 E 21 V 38 m 55 3
  854. 5 F 22 W 39 n 56 4
  855. 6 G 23 X 40 o 57 5
  856. 7 H 24 Y 41 p 58 6
  857. 8 I 25 Z 42 q 59 7
  858. 9 J 26 a 43 r 60 8
  859. 10 K 27 b 44 s 61 9
  860. 11 L 28 c 45 t 62 +
  861. 12 M 29 d 46 u 63 /
  862. 13 N 30 e 47 v
  863. 14 O 31 f 48 w (pad) =
  864. 15 P 32 g 49 x
  865. 16 Q 33 h 50 y
  866. The output stream (encoded bytes) must be represented in
  867. lines of no more than 76 characters each. All line breaks
  868. or other characters not found in Table 1 must be ignored by
  869. decoding software. In base64 data, characters other than
  870. those in Table 1, line breaks, and other white space
  871. probably indicate a transmission error, about which a
  872. warning message or even a message rejection might be
  873. appropriate under some circumstances.
  874. Special processing is performed if fewer than 24 bits are
  875. available at the end of the data being encoded. A full
  876. encoding quantum is always completed at the end of a body.
  877. When fewer than 24 input bits are available in an input
  878. group, zero bits are added (on the right) to form an
  879. integral number of 6-bit groups. Output character positions
  880. which are not required to represent actual input data are
  881. set to the character "=". Since all base64 input is an
  882. integral number of octets, only the following cases can
  883. arise: (1) the final quantum of encoding input is an
  884. integral multiple of 24 bits; here, the final unit of
  885. encoded output will be an integral multiple of 4 characters
  886. with no "=" padding, (2) the final quantum of encoding input
  887. is exactly 8 bits; here, the final unit of encoded output
  888. will be two characters followed by two "=" padding
  889. characters, or (3) the final quantum of encoding input is
  890. exactly 16 bits; here, the final unit of encoded output will
  891. be three characters followed by one "=" padding character.
  892. Care must be taken to use the proper octets for line breaks
  893. if base64 encoding is applied directly to text material that
  894. has not been converted to canonical form. In particular,
  895. text line breaks should be converted into CRLF sequences
  896. Borenstein & Freed [Page 18]
  897. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  898. prior to base64 encoding. The important thing to note is
  899. that this may be done directly by the encoder rather than in
  900. a prior canonicalization step in some implementations.
  901. NOTE: There is no need to worry about quoting apparent
  902. encapsulation boundaries within base64-encoded parts of
  903. multipart entities because no hyphen characters are used in
  904. the base64 encoding.
  905. 6 Additional Optional Content- Header Fields
  906. 6.1 Optional Content-ID Header Field
  907. In constructing a high-level user agent, it may be desirable
  908. to allow one body to make reference to another.
  909. Accordingly, bodies may be labeled using the "Content-ID"
  910. header field, which is syntactically identical to the
  911. "Message-ID" header field:
  912. Content-ID := msg-id
  913. Like the Message-ID values, Content-ID values must be
  914. generated to be as unique as possible.
  915. 6.2 Optional Content-Description Header Field
  916. The ability to associate some descriptive information with a
  917. given body is often desirable. For example, it may be useful
  918. to mark an "image" body as "a picture of the Space Shuttle
  919. Endeavor." Such text may be placed in the Content-
  920. Description header field.
  921. Content-Description := *text
  922. The description is presumed to be given in the US-ASCII
  923. character set, although the mechanism specified in [RFC-
  924. 1342] may be used for non-US-ASCII Content-Description
  925. values.
  926. Borenstein & Freed [Page 19]
  927. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  928. 7 The Predefined Content-Type Values
  929. This document defines seven initial Content-Type values and
  930. an extension mechanism for private or experimental types.
  931. Further standard types must be defined by new published
  932. specifications. It is expected that most innovation in new
  933. types of mail will take place as subtypes of the seven types
  934. defined here. The most essential characteristics of the
  935. seven content-types are summarized in Appendix G.
  936. 7.1 The Text Content-Type
  937. The text Content-Type is intended for sending material which
  938. is principally textual in form. It is the default Content-
  939. Type. A "charset" parameter may be used to indicate the
  940. character set of the body text. The primary subtype of text
  941. is "plain". This indicates plain (unformatted) text. The
  942. default Content-Type for Internet mail is "text/plain;
  943. charset=us-ascii".
  944. Beyond plain text, there are many formats for representing
  945. what might be known as "extended text" -- text with embedded
  946. formatting and presentation information. An interesting
  947. characteristic of many such representations is that they are
  948. to some extent readable even without the software that
  949. interprets them. It is useful, then, to distinguish them,
  950. at the highest level, from such unreadable data as images,
  951. audio, or text represented in an unreadable form. In the
  952. absence of appropriate interpretation software, it is
  953. reasonable to show subtypes of text to the user, while it is
  954. not reasonable to do so with most nontextual data.
  955. Such formatted textual data should be represented using
  956. subtypes of text. Plausible subtypes of text are typically
  957. given by the common name of the representation format, e.g.,
  958. "text/richtext".
  959. 7.1.1 The charset parameter
  960. A critical parameter that may be specified in the Content-
  961. Type field for text data is the character set. This is
  962. specified with a "charset" parameter, as in:
  963. Content-type: text/plain; charset=us-ascii
  964. Unlike some other parameter values, the values of the
  965. charset parameter are NOT case sensitive. The default
  966. character set, which must be assumed in the absence of a
  967. charset parameter, is US-ASCII.
  968. An initial list of predefined character set names can be
  969. found at the end of this section. Additional character sets
  970. may be registered with IANA as described in Appendix F,
  971. although the standardization of their use requires the usual
  972. Borenstein & Freed [Page 20]
  973. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  974. IAB review and approval. Note that if the specified
  975. character set includes 8-bit data, a Content-Transfer-
  976. Encoding header field and a corresponding encoding on the
  977. data are required in order to transmit the body via some
  978. mail transfer protocols, such as SMTP.
  979. The default character set, US-ASCII, has been the subject of
  980. some confusion and ambiguity in the past. Not only were
  981. there some ambiguities in the definition, there have been
  982. wide variations in practice. In order to eliminate such
  983. ambiguity and variations in the future, it is strongly
  984. recommended that new user agents explicitly specify a
  985. character set via the Content-Type header field. "US-ASCII"
  986. does not indicate an arbitrary seven-bit character code, but
  987. specifies that the body uses character coding that uses the
  988. exact correspondence of codes to characters specified in
  989. ASCII. National use variations of ISO 646 [ISO-646] are NOT
  990. ASCII and their use in Internet mail is explicitly
  991. discouraged. The omission of the ISO 646 character set is
  992. deliberate in this regard. The character set name of "US-
  993. ASCII" explicitly refers to ANSI X3.4-1986 [US-ASCII] only.
  994. The character set name "ASCII" is reserved and must not be
  995. used for any purpose.
  996. NOTE: RFC 821 explicitly specifies "ASCII", and references
  997. an earlier version of the American Standard. Insofar as one
  998. of the purposes of specifying a Content-Type and character
  999. set is to permit the receiver to unambiguously determine how
  1000. the sender intended the coded message to be interpreted,
  1001. assuming anything other than "strict ASCII" as the default
  1002. would risk unintentional and incompatible changes to the
  1003. semantics of messages now being transmitted. This also
  1004. implies that messages containing characters coded according
  1005. to national variations on ISO 646, or using code-switching
  1006. procedures (e.g., those of ISO 2022), as well as 8-bit or
  1007. multiple octet character encodings MUST use an appropriate
  1008. character set specification to be consistent with this
  1009. specification.
  1010. The complete US-ASCII character set is listed in [US-ASCII].
  1011. Note that the control characters including DEL (0-31, 127)
  1012. have no defined meaning apart from the combination CRLF
  1013. (ASCII values 13 and 10) indicating a new line. Two of the
  1014. characters have de facto meanings in wide use: FF (12) often
  1015. means "start subsequent text on the beginning of a new
  1016. page"; and TAB or HT (9) often (though not always) means
  1017. "move the cursor to the next available column after the
  1018. current position where the column number is a multiple of 8
  1019. (counting the first column as column 0)." Apart from this,
  1020. any use of the control characters or DEL in a body must be
  1021. part of a private agreement between the sender and
  1022. recipient. Such private agreements are discouraged and
  1023. should be replaced by the other capabilities of this
  1024. document.
  1025. Borenstein & Freed [Page 21]
  1026. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1027. NOTE: Beyond US-ASCII, an enormous proliferation of
  1028. character sets is possible. It is the opinion of the IETF
  1029. working group that a large number of character sets is NOT a
  1030. good thing. We would prefer to specify a single character
  1031. set that can be used universally for representing all of the
  1032. world's languages in electronic mail. Unfortunately,
  1033. existing practice in several communities seems to point to
  1034. the continued use of multiple character sets in the near
  1035. future. For this reason, we define names for a small number
  1036. of character sets for which a strong constituent base
  1037. exists. It is our hope that ISO 10646 or some other
  1038. effort will eventually define a single world character set
  1039. which can then be specified for use in Internet mail, but in
  1040. the advance of that definition we cannot specify the use of
  1041. ISO 10646, Unicode, or any other character set whose
  1042. definition is, as of this writing, incomplete.
  1043. The defined charset values are:
  1044. US-ASCII -- as defined in [US-ASCII].
  1045. ISO-8859-X -- where "X" is to be replaced, as
  1046. necessary, for the parts of ISO-8859 [ISO-
  1047. 8859]. Note that the ISO 646 character sets
  1048. have deliberately been omitted in favor of
  1049. their 8859 replacements, which are the
  1050. designated character sets for Internet mail.
  1051. As of the publication of this document, the
  1052. legitimate values for "X" are the digits 1
  1053. through 9.
  1054. Note that the character set used, if anything other than
  1055. US-ASCII, must always be explicitly specified in the
  1056. Content-Type field.
  1057. No other character set name may be used in Internet mail
  1058. without the publication of a formal specification and its
  1059. registration with IANA as described in Appendix F, or by
  1060. private agreement, in which case the character set name must
  1061. begin with "X-".
  1062. Implementors are discouraged from defining new character
  1063. sets for mail use unless absolutely necessary.
  1064. The "charset" parameter has been defined primarily for the
  1065. purpose of textual data, and is described in this section
  1066. for that reason. However, it is conceivable that non-
  1067. textual data might also wish to specify a charset value for
  1068. some purpose, in which case the same syntax and values
  1069. should be used.
  1070. In general, mail-sending software should always use the
  1071. "lowest common denominator" character set possible. For
  1072. example, if a body contains only US-ASCII characters, it
  1073. Borenstein & Freed [Page 22]
  1074. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1075. should be marked as being in the US-ASCII character set, not
  1076. ISO-8859-1, which, like all the ISO-8859 family of character
  1077. sets, is a superset of US-ASCII. More generally, if a
  1078. widely-used character set is a subset of another character
  1079. set, and a body contains only characters in the widely-used
  1080. subset, it should be labeled as being in that subset. This
  1081. will increase the chances that the recipient will be able to
  1082. view the mail correctly.
  1083. 7.1.2 The Text/plain subtype
  1084. The primary subtype of text is "plain". This indicates
  1085. plain (unformatted) text. The default Content-Type for
  1086. Internet mail, "text/plain; charset=us-ascii", describes
  1087. existing Internet practice, that is, it is the type of body
  1088. defined by RFC 822.
  1089. 7.1.3 The Text/richtext subtype
  1090. In order to promote the wider interoperability of simple
  1091. formatted text, this document defines an extremely simple
  1092. subtype of "text", the "richtext" subtype. This subtype was
  1093. designed to meet the following criteria:
  1094. 1. The syntax must be extremely simple to parse,
  1095. so that even teletype-oriented mail systems can
  1096. easily strip away the formatting information and
  1097. leave only the readable text.
  1098. 2. The syntax must be extensible to allow for new
  1099. formatting commands that are deemed essential.
  1100. 3. The capabilities must be extremely limited, to
  1101. ensure that it can represent no more than is
  1102. likely to be representable by the user's primary
  1103. word processor. While this limits what can be
  1104. sent, it increases the likelihood that what is
  1105. sent can be properly displayed.
  1106. 4. The syntax must be compatible with SGML, so
  1107. that, with an appropriate DTD (Document Type
  1108. Definition, the standard mechanism for defining a
  1109. document type using SGML), a general SGML parser
  1110. could be made to parse richtext. However, despite
  1111. this compatibility, the syntax should be far
  1112. simpler than full SGML, so that no SGML knowledge
  1113. is required in order to implement it.
  1114. The syntax of "richtext" is very simple. It is assumed, at
  1115. the top-level, to be in the US-ASCII character set, unless
  1116. of course a different charset parameter was specified in the
  1117. Content-type field. All characters represent themselves,
  1118. with the exception of the "<" character (ASCII 60), which is
  1119. used to mark the beginning of a formatting command.
  1120. Borenstein & Freed [Page 23]
  1121. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1122. Formatting instructions consist of formatting commands
  1123. surrounded by angle brackets ("<>", ASCII 60 and 62). Each
  1124. formatting command may be no more than 40 characters in
  1125. length, all in US-ASCII, restricted to the alphanumeric and
  1126. hyphen ("-") characters. Formatting commands may be preceded
  1127. by a forward slash or solidus ("/", ASCII 47), making them
  1128. negations, and such negations must always exist to balance
  1129. the initial opening commands, except as noted below. Thus,
  1130. if the formatting command "<bold>" appears at some point,
  1131. there must later be a "</bold>" to balance it. There are
  1132. only three exceptions to this "balancing" rule: First, the
  1133. command "<lt>" is used to represent a literal "<" character.
  1134. Second, the command "<nl>" is used to represent a required
  1135. line break. (Otherwise, CRLFs in the data are treated as
  1136. equivalent to a single SPACE character.) Finally, the
  1137. command "<np>" is used to represent a page break. (NOTE:
  1138. The 40 character limit on formatting commands does not
  1139. include the "<", ">", or "/" characters that might be
  1140. attached to such commands.)
  1141. Initially defined formatting commands, not all of which will
  1142. be implemented by all richtext implementations, include:
  1143. Bold -- causes the subsequent text to be in a bold
  1144. font.
  1145. Italic -- causes the subsequent text to be in an italic
  1146. font.
  1147. Fixed -- causes the subsequent text to be in a fixed
  1148. width font.
  1149. Smaller -- causes the subsequent text to be in a
  1150. smaller font.
  1151. Bigger -- causes the subsequent text to be in a bigger
  1152. font.
  1153. Underline -- causes the subsequent text to be
  1154. underlined.
  1155. Center -- causes the subsequent text to be centered.
  1156. FlushLeft -- causes the subsequent text to be left
  1157. justified.
  1158. FlushRight -- causes the subsequent text to be right
  1159. justified.
  1160. Indent -- causes the subsequent text to be indented at
  1161. the left margin.
  1162. IndentRight -- causes the subsequent text to be
  1163. indented at the right margin.
  1164. Outdent -- causes the subsequent text to be outdented
  1165. at the left margin.
  1166. OutdentRight -- causes the subsequent text to be
  1167. outdented at the right margin.
  1168. SamePage -- causes the subsequent text to be grouped,
  1169. if possible, on one page.
  1170. Subscript -- causes the subsequent text to be
  1171. interpreted as a subscript.
  1172. Borenstein & Freed [Page 24]
  1173. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1174. Superscript -- causes the subsequent text to be
  1175. interpreted as a superscript.
  1176. Heading -- causes the subsequent text to be interpreted
  1177. as a page heading.
  1178. Footing -- causes the subsequent text to be interpreted
  1179. as a page footing.
  1180. ISO-8859-X (for any value of X that is legal as a
  1181. "charset" parameter) -- causes the subsequent text
  1182. to be interpreted as text in the appropriate
  1183. character set.
  1184. US-ASCII -- causes the subsequent text to be
  1185. interpreted as text in the US-ASCII character set.
  1186. Excerpt -- causes the subsequent text to be interpreted
  1187. as a textual excerpt from another source.
  1188. Typically this will be displayed using indentation
  1189. and an alternate font, but such decisions are up
  1190. to the viewer.
  1191. Paragraph -- causes the subsequent text to be
  1192. interpreted as a single paragraph, with
  1193. appropriate paragraph breaks (typically blank
  1194. space) before and after.
  1195. Signature -- causes the subsequent text to be
  1196. interpreted as a "signature". Some systems may
  1197. wish to display signatures in a smaller font or
  1198. otherwise set them apart from the main text of the
  1199. message.
  1200. Comment -- causes the subsequent text to be interpreted
  1201. as a comment, and hence not shown to the reader.
  1202. No-op -- has no effect on the subsequent text.
  1203. lt -- <lt> is replaced by a literal "<" character. No
  1204. balancing </lt> is allowed.
  1205. nl -- <nl> causes a line break. No balancing </nl> is
  1206. allowed.
  1207. np -- <np> causes a page break. No balancing </np> is
  1208. allowed.
  1209. Each positive formatting command affects all subsequent text
  1210. until the matching negative formatting command. Such pairs
  1211. of formatting commands must be properly balanced and nested.
  1212. Thus, a proper way to describe text in bold italics is:
  1213. <bold><italic>the-text</italic></bold>
  1214. or, alternately,
  1215. <italic><bold>the-text</bold></italic>
  1216. but, in particular, the following is illegal
  1217. richtext:
  1218. <bold><italic>the-text</bold></italic>
  1219. NOTE: The nesting requirement for formatting commands
  1220. imposes a slightly higher burden upon the composers of
  1221. Borenstein & Freed [Page 25]
  1222. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1223. richtext bodies, but potentially simplifies richtext
  1224. displayers by allowing them to be stack-based. The main
  1225. goal of richtext is to be simple enough to make multifont,
  1226. formatted email widely readable, so that those with the
  1227. capability of sending it will be able to do so with
  1228. confidence. Thus slightly increased complexity in the
  1229. composing software was deemed a reasonable tradeoff for
  1230. simplified reading software. Nonetheless, implementors of
  1231. richtext readers are encouraged to follow the general
  1232. Internet guidelines of being conservative in what you send
  1233. and liberal in what you accept. Those implementations that
  1234. can do so are encouraged to deal reasonably with improperly
  1235. nested richtext.
  1236. Implementations must regard any unrecognized formatting
  1237. command as equivalent to "No-op", thus facilitating future
  1238. extensions to "richtext". Private extensions may be defined
  1239. using formatting commands that begin with "X-", by analogy
  1240. to Internet mail header field names.
  1241. It is worth noting that no special behavior is required for
  1242. the TAB (HT) character. It is recommended, however, that, at
  1243. least when fixed-width fonts are in use, the common
  1244. semantics of the TAB (HT) character should be observed,
  1245. namely that it moves to the next column position that is a
  1246. multiple of 8. (In other words, if a TAB (HT) occurs in
  1247. column n, where the leftmost column is column 0, then that
  1248. TAB (HT) should be replaced by 8-(n mod 8) SPACE
  1249. characters.)
  1250. Richtext also differentiates between "hard" and "soft" line
  1251. breaks. A line break (CRLF) in the richtext data stream is
  1252. interpreted as a "soft" line break, one that is included
  1253. only for purposes of mail transport, and is to be treated as
  1254. white space by richtext interpreters. To include a "hard"
  1255. line break (one that must be displayed as such), the "<nl>"
  1256. or "<paragraph> formatting constructs should be used. In
  1257. general, a soft line break should be treated as white space,
  1258. but when soft line breaks immediately follow a <nl> or a
  1259. </paragraph> tag they should be ignored rather than treated
  1260. as white space.
  1261. Putting all this together, the following "text/richtext"
  1262. body fragment:
  1263. <bold>Now</bold> is the time for
  1264. <italic>all</italic> good men
  1265. <smaller>(and <lt>women>)</smaller> to
  1266. <ignoreme></ignoreme> come
  1267. to the aid of their
  1268. <nl>
  1269. Borenstein & Freed [Page 26]
  1270. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1271. beloved <nl><nl>country. <comment> Stupid
  1272. quote! </comment> -- the end
  1273. represents the following formatted text (which will, no
  1274. doubt, look cryptic in the text-only version of this
  1275. document):
  1276. Now is the time for all good men (and <women>) to
  1277. come to the aid of their
  1278. beloved
  1279. country. -- the end
  1280. Richtext conformance: A minimal richtext implementation is
  1281. one that simply converts "<lt>" to "<", converts CRLFs to
  1282. SPACE, converts <nl> to a newline according to local newline
  1283. convention, removes everything between a <comment> command
  1284. and the next balancing </comment> command, and removes all
  1285. other formatting commands (all text enclosed in angle
  1286. brackets).
  1287. NOTE ON THE RELATIONSHIP OF RICHTEXT TO SGML: Richtext is
  1288. decidedly not SGML, and must not be used to transport
  1289. arbitrary SGML documents. Those who wish to use SGML
  1290. document types as a mail transport format must define a new
  1291. text or application subtype, e.g., "text/sgml-dtd-whatever"
  1292. or "application/sgml-dtd-whatever", depending on the
  1293. perceived readability of the DTD in use. Richtext is
  1294. designed to be compatible with SGML, and specifically so
  1295. that it will be possible to define a richtext DTD if one is
  1296. needed. However, this does not imply that arbitrary SGML
  1297. can be called richtext, nor that richtext implementors have
  1298. any need to understand SGML; the description in this
  1299. document is a complete definition of richtext, which is far
  1300. simpler than complete SGML.
  1301. NOTE ON THE INTENDED USE OF RICHTEXT: It is recognized that
  1302. implementors of future mail systems will want rich text
  1303. functionality far beyond that currently defined for
  1304. richtext. The intent of richtext is to provide a common
  1305. format for expressing that functionality in a form in which
  1306. much of it, at least, will be understood by interoperating
  1307. software. Thus, in particular, software with a richer
  1308. notion of formatted text than richtext can still use
  1309. richtext as its basic representation, but can extend it with
  1310. new formatting commands and by hiding information specific
  1311. to that software system in richtext comments. As such
  1312. systems evolve, it is expected that the definition of
  1313. richtext will be further refined by future published
  1314. specifications, but richtext as defined here provides a
  1315. platform on which evolutionary refinements can be based.
  1316. IMPLEMENTATION NOTE: In some environments, it might be
  1317. impossible to combine certain richtext formatting commands,
  1318. Borenstein & Freed [Page 27]
  1319. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1320. whereas in others they might be combined easily. For
  1321. example, the combination of <bold> and <italic> might
  1322. produce bold italics on systems that support such fonts, but
  1323. there exist systems that can make text bold or italicized,
  1324. but not both. In such cases, the most recently issued
  1325. recognized formatting command should be preferred.
  1326. One of the major goals in the design of richtext was to make
  1327. it so simple that even text-only mailers will implement
  1328. richtext-to-plain-text translators, thus increasing the
  1329. likelihood that multifont text will become "safe" to use
  1330. very widely. To demonstrate this simplicity, an extremely
  1331. simple 35-line C program that converts richtext input into
  1332. plain text output is included in Appendix D.
  1333. Borenstein & Freed [Page 28]
  1334. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1335. 7.2 The Multipart Content-Type
  1336. In the case of multiple part messages, in which one or more
  1337. different sets of data are combined in a single body, a
  1338. "multipart" Content-Type field must appear in the entity's
  1339. header. The body must then contain one or more "body parts,"
  1340. each preceded by an encapsulation boundary, and the last one
  1341. followed by a closing boundary. Each part starts with an
  1342. encapsulation boundary, and then contains a body part
  1343. consisting of header area, a blank line, and a body area.
  1344. Thus a body part is similar to an RFC 822 message in syntax,
  1345. but different in meaning.
  1346. A body part is NOT to be interpreted as actually being an
  1347. RFC 822 message. To begin with, NO header fields are
  1348. actually required in body parts. A body part that starts
  1349. with a blank line, therefore, is allowed and is a body part
  1350. for which all default values are to be assumed. In such a
  1351. case, the absence of a Content-Type header field implies
  1352. that the encapsulation is plain US-ASCII text. The only
  1353. header fields that have defined meaning for body parts are
  1354. those the names of which begin with "Content-". All other
  1355. header fields are generally to be ignored in body parts.
  1356. Although they should generally be retained in mail
  1357. processing, they may be discarded by gateways if necessary.
  1358. Such other fields are permitted to appear in body parts but
  1359. should not be depended on. "X-" fields may be created for
  1360. experimental or private purposes, with the recognition that
  1361. the information they contain may be lost at some gateways.
  1362. The distinction between an RFC 822 message and a body part
  1363. is subtle, but important. A gateway between Internet and
  1364. X.400 mail, for example, must be able to tell the difference
  1365. between a body part that contains an image and a body part
  1366. that contains an encapsulated message, the body of which is
  1367. an image. In order to represent the latter, the body part
  1368. must have "Content-Type: message", and its body (after the
  1369. blank line) must be the encapsulated message, with its own
  1370. "Content-Type: image" header field. The use of similar
  1371. syntax facilitates the conversion of messages to body parts,
  1372. and vice versa, but the distinction between the two must be
  1373. understood by implementors. (For the special case in which
  1374. all parts actually are messages, a "digest" subtype is also
  1375. defined.)
  1376. As stated previously, each body part is preceded by an
  1377. encapsulation boundary. The encapsulation boundary MUST NOT
  1378. appear inside any of the encapsulated parts. Thus, it is
  1379. crucial that the composing agent be able to choose and
  1380. specify the unique boundary that will separate the parts.
  1381. All present and future subtypes of the "multipart" type must
  1382. use an identical syntax. Subtypes may differ in their
  1383. semantics, and may impose additional restrictions on syntax,
  1384. Borenstein & Freed [Page 29]
  1385. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1386. but must conform to the required syntax for the multipart
  1387. type. This requirement ensures that all conformant user
  1388. agents will at least be able to recognize and separate the
  1389. parts of any multipart entity, even of an unrecognized
  1390. subtype.
  1391. As stated in the definition of the Content-Transfer-Encoding
  1392. field, no encoding other than "7bit", "8bit", or "binary" is
  1393. permitted for entities of type "multipart". The multipart
  1394. delimiters and header fields are always 7-bit ASCII in any
  1395. case, and data within the body parts can be encoded on a
  1396. part-by-part basis, with Content-Transfer-Encoding fields
  1397. for each appropriate body part.
  1398. Mail gateways, relays, and other mail handling agents are
  1399. commonly known to alter the top-level header of an RFC 822
  1400. message. In particular, they frequently add, remove, or
  1401. reorder header fields. Such alterations are explicitly
  1402. forbidden for the body part headers embedded in the bodies
  1403. of messages of type "multipart."
  1404. 7.2.1 Multipart: The common syntax
  1405. All subtypes of "multipart" share a common syntax, defined
  1406. in this section. A simple example of a multipart message
  1407. also appears in this section. An example of a more complex
  1408. multipart message is given in Appendix C.
  1409. The Content-Type field for multipart entities requires one
  1410. parameter, "boundary", which is used to specify the
  1411. encapsulation boundary. The encapsulation boundary is
  1412. defined as a line consisting entirely of two hyphen
  1413. characters ("-", decimal code 45) followed by the boundary
  1414. parameter value from the Content-Type header field.
  1415. NOTE: The hyphens are for rough compatibility with the
  1416. earlier RFC 934 method of message encapsulation, and for
  1417. ease of searching for the boundaries in some
  1418. implementations. However, it should be noted that multipart
  1419. messages are NOT completely compatible with RFC 934
  1420. encapsulations; in particular, they do not obey RFC 934
  1421. quoting conventions for embedded lines that begin with
  1422. hyphens. This mechanism was chosen over the RFC 934
  1423. mechanism because the latter causes lines to grow with each
  1424. level of quoting. The combination of this growth with the
  1425. fact that SMTP implementations sometimes wrap long lines
  1426. made the RFC 934 mechanism unsuitable for use in the event
  1427. that deeply-nested multipart structuring is ever desired.
  1428. Thus, a typical multipart Content-Type header field might
  1429. look like this:
  1430. Content-Type: multipart/mixed;
  1431. Borenstein & Freed [Page 30]
  1432. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1433. boundary=gc0p4Jq0M2Yt08jU534c0p
  1434. This indicates that the entity consists of several parts,
  1435. each itself with a structure that is syntactically identical
  1436. to an RFC 822 message, except that the header area might be
  1437. completely empty, and that the parts are each preceded by
  1438. the line
  1439. --gc0p4Jq0M2Yt08jU534c0p
  1440. Note that the encapsulation boundary must occur at the
  1441. beginning of a line, i.e., following a CRLF, and that that
  1442. initial CRLF is considered to be part of the encapsulation
  1443. boundary rather than part of the preceding part. The
  1444. boundary must be followed immediately either by another CRLF
  1445. and the header fields for the next part, or by two CRLFs, in
  1446. which case there are no header fields for the next part (and
  1447. it is therefore assumed to be of Content-Type text/plain).
  1448. NOTE: The CRLF preceding the encapsulation line is
  1449. considered part of the boundary so that it is possible to
  1450. have a part that does not end with a CRLF (line break).
  1451. Body parts that must be considered to end with line breaks,
  1452. therefore, should have two CRLFs preceding the encapsulation
  1453. line, the first of which is part of the preceding body part,
  1454. and the second of which is part of the encapsulation
  1455. boundary.
  1456. The requirement that the encapsulation boundary begins with
  1457. a CRLF implies that the body of a multipart entity must
  1458. itself begin with a CRLF before the first encapsulation line
  1459. -- that is, if the "preamble" area is not used, the entity
  1460. headers must be followed by TWO CRLFs. This is indeed how
  1461. such entities should be composed. A tolerant mail reading
  1462. program, however, may interpret a body of type multipart
  1463. that begins with an encapsulation line NOT initiated by a
  1464. CRLF as also being an encapsulation boundary, but a
  1465. compliant mail sending program must not generate such
  1466. entities.
  1467. Encapsulation boundaries must not appear within the
  1468. encapsulations, and must be no longer than 70 characters,
  1469. not counting the two leading hyphens.
  1470. The encapsulation boundary following the last body part is a
  1471. distinguished delimiter that indicates that no further body
  1472. parts will follow. Such a delimiter is identical to the
  1473. previous delimiters, with the addition of two more hyphens
  1474. at the end of the line:
  1475. --gc0p4Jq0M2Yt08jU534c0p--
  1476. There appears to be room for additional information prior to
  1477. the first encapsulation boundary and following the final
  1478. Borenstein & Freed [Page 31]
  1479. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1480. boundary. These areas should generally be left blank, and
  1481. implementations should ignore anything that appears before
  1482. the first boundary or after the last one.
  1483. NOTE: These "preamble" and "epilogue" areas are not used
  1484. because of the lack of proper typing of these parts and the
  1485. lack of clear semantics for handling these areas at
  1486. gateways, particularly X.400 gateways.
  1487. NOTE: Because encapsulation boundaries must not appear in
  1488. the body parts being encapsulated, a user agent must
  1489. exercise care to choose a unique boundary. The boundary in
  1490. the example above could have been the result of an algorithm
  1491. designed to produce boundaries with a very low probability
  1492. of already existing in the data to be encapsulated without
  1493. having to prescan the data. Alternate algorithms might
  1494. result in more 'readable' boundaries for a recipient with an
  1495. old user agent, but would require more attention to the
  1496. possibility that the boundary might appear in the
  1497. encapsulated part. The simplest boundary possible is
  1498. something like "---", with a closing boundary of "-----".
  1499. As a very simple example, the following multipart message
  1500. has two parts, both of them plain text, one of them
  1501. explicitly typed and one of them implicitly typed:
  1502. From: Nathaniel Borenstein <nsb@bellcore.com>
  1503. To: Ned Freed <ned@innosoft.com>
  1504. Subject: Sample message
  1505. MIME-Version: 1.0
  1506. Content-type: multipart/mixed; boundary="simple
  1507. boundary"
  1508. This is the preamble. It is to be ignored, though it
  1509. is a handy place for mail composers to include an
  1510. explanatory note to non-MIME compliant readers.
  1511. --simple boundary
  1512. This is implicitly typed plain ASCII text.
  1513. It does NOT end with a linebreak.
  1514. --simple boundary
  1515. Content-type: text/plain; charset=us-ascii
  1516. This is explicitly typed plain ASCII text.
  1517. It DOES end with a linebreak.
  1518. --simple boundary--
  1519. This is the epilogue. It is also to be ignored.
  1520. The use of a Content-Type of multipart in a body part within
  1521. another multipart entity is explicitly allowed. In such
  1522. cases, for obvious reasons, care must be taken to ensure
  1523. that each nested multipart entity must use a different
  1524. boundary delimiter. See Appendix C for an example of nested
  1525. Borenstein & Freed [Page 32]
  1526. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1527. multipart entities.
  1528. The use of the multipart Content-Type with only a single
  1529. body part may be useful in certain contexts, and is
  1530. explicitly permitted.
  1531. The only mandatory parameter for the multipart Content-Type
  1532. is the boundary parameter, which consists of 1 to 70
  1533. characters from a set of characters known to be very robust
  1534. through email gateways, and NOT ending with white space.
  1535. (If a boundary appears to end with white space, the white
  1536. space must be presumed to have been added by a gateway, and
  1537. should be deleted.) It is formally specified by the
  1538. following BNF:
  1539. boundary := 0*69<bchars> bcharsnospace
  1540. bchars := bcharsnospace / " "
  1541. bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /
  1542. "_"
  1543. / "," / "-" / "." / "/" / ":" / "=" / "?"
  1544. Overall, the body of a multipart entity may be specified as
  1545. follows:
  1546. multipart-body := preamble 1*encapsulation
  1547. close-delimiter epilogue
  1548. encapsulation := delimiter CRLF body-part
  1549. delimiter := CRLF "--" boundary ; taken from Content-Type
  1550. field.
  1551. ; when content-type is
  1552. multipart
  1553. ; There must be no space
  1554. ; between "--" and boundary.
  1555. close-delimiter := delimiter "--" ; Again, no space before
  1556. "--"
  1557. preamble := *text ; to be ignored upon
  1558. receipt.
  1559. epilogue := *text ; to be ignored upon
  1560. receipt.
  1561. body-part = <"message" as defined in RFC 822,
  1562. with all header fields optional, and with the
  1563. specified delimiter not occurring anywhere in
  1564. the message body, either on a line by itself
  1565. or as a substring anywhere. Note that the
  1566. Borenstein & Freed [Page 33]
  1567. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1568. semantics of a part differ from the semantics
  1569. of a message, as described in the text.>
  1570. NOTE: Conspicuously missing from the multipart type is a
  1571. notion of structured, related body parts. In general, it
  1572. seems premature to try to standardize interpart structure
  1573. yet. It is recommended that those wishing to provide a more
  1574. structured or integrated multipart messaging facility should
  1575. define a subtype of multipart that is syntactically
  1576. identical, but that always expects the inclusion of a
  1577. distinguished part that can be used to specify the structure
  1578. and integration of the other parts, probably referring to
  1579. them by their Content-ID field. If this approach is used,
  1580. other implementations will not recognize the new subtype,
  1581. but will treat it as the primary subtype (multipart/mixed)
  1582. and will thus be able to show the user the parts that are
  1583. recognized.
  1584. 7.2.2 The Multipart/mixed (primary) subtype
  1585. The primary subtype for multipart, "mixed", is intended for
  1586. use when the body parts are independent and intended to be
  1587. displayed serially. Any multipart subtypes that an
  1588. implementation does not recognize should be treated as being
  1589. of subtype "mixed".
  1590. 7.2.3 The Multipart/alternative subtype
  1591. The multipart/alternative type is syntactically identical to
  1592. multipart/mixed, but the semantics are different. In
  1593. particular, each of the parts is an "alternative" version of
  1594. the same information. User agents should recognize that the
  1595. content of the various parts are interchangeable. The user
  1596. agent should either choose the "best" type based on the
  1597. user's environment and preferences, or offer the user the
  1598. available alternatives. In general, choosing the best type
  1599. means displaying only the LAST part that can be displayed.
  1600. This may be used, for example, to send mail in a fancy text
  1601. format in such a way that it can easily be displayed
  1602. anywhere:
  1603. From: Nathaniel Borenstein <nsb@bellcore.com>
  1604. To: Ned Freed <ned@innosoft.com>
  1605. Subject: Formatted text mail
  1606. MIME-Version: 1.0
  1607. Content-Type: multipart/alternative; boundary=boundary42
  1608. --boundary42
  1609. Content-Type: text/plain; charset=us-ascii
  1610. ...plain text version of message goes here....
  1611. Borenstein & Freed [Page 34]
  1612. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1613. --boundary42
  1614. Content-Type: text/richtext
  1615. .... richtext version of same message goes here ...
  1616. --boundary42
  1617. Content-Type: text/x-whatever
  1618. .... fanciest formatted version of same message goes here
  1619. ...
  1620. --boundary42--
  1621. In this example, users whose mail system understood the
  1622. "text/x-whatever" format would see only the fancy version,
  1623. while other users would see only the richtext or plain text
  1624. version, depending on the capabilities of their system.
  1625. In general, user agents that compose multipart/alternative
  1626. entities should place the body parts in increasing order of
  1627. preference, that is, with the preferred format last. For
  1628. fancy text, the sending user agent should put the plainest
  1629. format first and the richest format last. Receiving user
  1630. agents should pick and display the last format they are
  1631. capable of displaying. In the case where one of the
  1632. alternatives is itself of type "multipart" and contains
  1633. unrecognized sub-parts, the user agent may choose either to
  1634. show that alternative, an earlier alternative, or both.
  1635. NOTE: From an implementor's perspective, it might seem more
  1636. sensible to reverse this ordering, and have the plainest
  1637. alternative last. However, placing the plainest alternative
  1638. first is the friendliest possible option when
  1639. mutlipart/alternative entities are viewed using a non-MIME-
  1640. compliant mail reader. While this approach does impose some
  1641. burden on compliant mail readers, interoperability with
  1642. older mail readers was deemed to be more important in this
  1643. case.
  1644. It may be the case that some user agents, if they can
  1645. recognize more than one of the formats, will prefer to offer
  1646. the user the choice of which format to view. This makes
  1647. sense, for example, if mail includes both a nicely-formatted
  1648. image version and an easily-edited text version. What is
  1649. most critical, however, is that the user not automatically
  1650. be shown multiple versions of the same data. Either the
  1651. user should be shown the last recognized version or should
  1652. explicitly be given the choice.
  1653. Borenstein & Freed [Page 35]
  1654. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1655. 7.2.4 The Multipart/digest subtype
  1656. This document defines a "digest" subtype of the multipart
  1657. Content-Type. This type is syntactically identical to
  1658. multipart/mixed, but the semantics are different. In
  1659. particular, in a digest, the default Content-Type value for
  1660. a body part is changed from "text/plain" to
  1661. "message/rfc822". This is done to allow a more readable
  1662. digest format that is largely compatible (except for the
  1663. quoting convention) with RFC 934.
  1664. A digest in this format might, then, look something like
  1665. this:
  1666. From: Moderator-Address
  1667. MIME-Version: 1.0
  1668. Subject: Internet Digest, volume 42
  1669. Content-Type: multipart/digest;
  1670. boundary="---- next message ----"
  1671. ------ next message ----
  1672. From: someone-else
  1673. Subject: my opinion
  1674. ...body goes here ...
  1675. ------ next message ----
  1676. From: someone-else-again
  1677. Subject: my different opinion
  1678. ... another body goes here...
  1679. ------ next message ------
  1680. 7.2.5 The Multipart/parallel subtype
  1681. This document defines a "parallel" subtype of the multipart
  1682. Content-Type. This type is syntactically identical to
  1683. multipart/mixed, but the semantics are different. In
  1684. particular, in a parallel entity, all of the parts are
  1685. intended to be presented in parallel, i.e., simultaneously,
  1686. on hardware and software that are capable of doing so.
  1687. Composing agents should be aware that many mail readers will
  1688. lack this capability and will show the parts serially in any
  1689. event.
  1690. Borenstein & Freed [Page 36]
  1691. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1692. 7.3 The Message Content-Type
  1693. It is frequently desirable, in sending mail, to encapsulate
  1694. another mail message. For this common operation, a special
  1695. Content-Type, "message", is defined. The primary subtype,
  1696. message/rfc822, has no required parameters in the Content-
  1697. Type field. Additional subtypes, "partial" and "External-
  1698. body", do have required parameters. These subtypes are
  1699. explained below.
  1700. NOTE: It has been suggested that subtypes of message might
  1701. be defined for forwarded or rejected messages. However,
  1702. forwarded and rejected messages can be handled as multipart
  1703. messages in which the first part contains any control or
  1704. descriptive information, and a second part, of type
  1705. message/rfc822, is the forwarded or rejected message.
  1706. Composing rejection and forwarding messages in this manner
  1707. will preserve the type information on the original message
  1708. and allow it to be correctly presented to the recipient, and
  1709. hence is strongly encouraged.
  1710. As stated in the definition of the Content-Transfer-Encoding
  1711. field, no encoding other than "7bit", "8bit", or "binary" is
  1712. permitted for messages or parts of type "message". The
  1713. message header fields are always US-ASCII in any case, and
  1714. data within the body can still be encoded, in which case the
  1715. Content-Transfer-Encoding header field in the encapsulated
  1716. message will reflect this. Non-ASCII text in the headers of
  1717. an encapsulated message can be specified using the
  1718. mechanisms described in [RFC-1342].
  1719. Mail gateways, relays, and other mail handling agents are
  1720. commonly known to alter the top-level header of an RFC 822
  1721. message. In particular, they frequently add, remove, or
  1722. reorder header fields. Such alterations are explicitly
  1723. forbidden for the encapsulated headers embedded in the
  1724. bodies of messages of type "message."
  1725. 7.3.1 The Message/rfc822 (primary) subtype
  1726. A Content-Type of "message/rfc822" indicates that the body
  1727. contains an encapsulated message, with the syntax of an RFC
  1728. 822 message.
  1729. 7.3.2 The Message/Partial subtype
  1730. A subtype of message, "partial", is defined in order to
  1731. allow large objects to be delivered as several separate
  1732. pieces of mail and automatically reassembled by the
  1733. receiving user agent. (The concept is similar to IP
  1734. fragmentation/reassembly in the basic Internet Protocols.)
  1735. This mechanism can be used when intermediate transport
  1736. agents limit the size of individual messages that can be
  1737. sent. Content-Type "message/partial" thus indicates that
  1738. Borenstein & Freed [Page 37]
  1739. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1740. the body contains a fragment of a larger message.
  1741. Three parameters must be specified in the Content-Type field
  1742. of type message/partial: The first, "id", is a unique
  1743. identifier, as close to a world-unique identifier as
  1744. possible, to be used to match the parts together. (In
  1745. general, the identifier is essentially a message-id; if
  1746. placed in double quotes, it can be any message-id, in
  1747. accordance with the BNF for "parameter" given earlier in
  1748. this specification.) The second, "number", an integer, is
  1749. the part number, which indicates where this part fits into
  1750. the sequence of fragments. The third, "total", another
  1751. integer, is the total number of parts. This third subfield
  1752. is required on the final part, and is optional on the
  1753. earlier parts. Note also that these parameters may be given
  1754. in any order.
  1755. Thus, part 2 of a 3-part message may have either of the
  1756. following header fields:
  1757. Content-Type: Message/Partial;
  1758. number=2; total=3;
  1759. id="oc=jpbe0M2Yt4s@thumper.bellcore.com";
  1760. Content-Type: Message/Partial;
  1761. id="oc=jpbe0M2Yt4s@thumper.bellcore.com";
  1762. number=2
  1763. But part 3 MUST specify the total number of parts:
  1764. Content-Type: Message/Partial;
  1765. number=3; total=3;
  1766. id="oc=jpbe0M2Yt4s@thumper.bellcore.com";
  1767. Note that part numbering begins with 1, not 0.
  1768. When the parts of a message broken up in this manner are put
  1769. together, the result is a complete RFC 822 format message,
  1770. which may have its own Content-Type header field, and thus
  1771. may contain any other data type.
  1772. Message fragmentation and reassembly: The semantics of a
  1773. reassembled partial message must be those of the "inner"
  1774. message, rather than of a message containing the inner
  1775. message. This makes it possible, for example, to send a
  1776. large audio message as several partial messages, and still
  1777. have it appear to the recipient as a simple audio message
  1778. rather than as an encapsulated message containing an audio
  1779. message. That is, the encapsulation of the message is
  1780. considered to be "transparent".
  1781. When generating and reassembling the parts of a
  1782. message/partial message, the headers of the encapsulated
  1783. message must be merged with the headers of the enclosing
  1784. Borenstein & Freed [Page 38]
  1785. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1786. entities. In this process the following rules must be
  1787. observed:
  1788. (1) All of the headers from the initial enclosing
  1789. entity (part one), except those that start with
  1790. "Content-" and "Message-ID", must be copied, in
  1791. order, to the new message.
  1792. (2) Only those headers in the enclosed message
  1793. which start with "Content-" and "Message-ID" must
  1794. be appended, in order, to the headers of the new
  1795. message. Any headers in the enclosed message
  1796. which do not start with "Content-" (except for
  1797. "Message-ID") will be ignored.
  1798. (3) All of the headers from the second and any
  1799. subsequent messages will be ignored.
  1800. For example, if an audio message is broken into two parts,
  1801. the first part might look something like this:
  1802. X-Weird-Header-1: Foo
  1803. From: Bill@host.com
  1804. To: joe@otherhost.com
  1805. Subject: Audio mail
  1806. Message-ID: id1@host.com
  1807. MIME-Version: 1.0
  1808. Content-type: message/partial;
  1809. id="ABC@host.com";
  1810. number=1; total=2
  1811. X-Weird-Header-1: Bar
  1812. X-Weird-Header-2: Hello
  1813. Message-ID: anotherid@foo.com
  1814. Content-type: audio/basic
  1815. Content-transfer-encoding: base64
  1816. ... first half of encoded audio data goes here...
  1817. and the second half might look something like this:
  1818. From: Bill@host.com
  1819. To: joe@otherhost.com
  1820. Subject: Audio mail
  1821. MIME-Version: 1.0
  1822. Message-ID: id2@host.com
  1823. Content-type: message/partial;
  1824. id="ABC@host.com"; number=2; total=2
  1825. ... second half of encoded audio data goes here...
  1826. Then, when the fragmented message is reassembled, the
  1827. resulting message to be displayed to the user should look
  1828. something like this:
  1829. Borenstein & Freed [Page 39]
  1830. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1831. X-Weird-Header-1: Foo
  1832. From: Bill@host.com
  1833. To: joe@otherhost.com
  1834. Subject: Audio mail
  1835. Message-ID: anotherid@foo.com
  1836. MIME-Version: 1.0
  1837. Content-type: audio/basic
  1838. Content-transfer-encoding: base64
  1839. ... first half of encoded audio data goes here...
  1840. ... second half of encoded audio data goes here...
  1841. It should be noted that, because some message transfer
  1842. agents may choose to automatically fragment large messages,
  1843. and because such agents may use different fragmentation
  1844. thresholds, it is possible that the pieces of a partial
  1845. message, upon reassembly, may prove themselves to comprise a
  1846. partial message. This is explicitly permitted.
  1847. It should also be noted that the inclusion of a "References"
  1848. field in the headers of the second and subsequent pieces of
  1849. a fragmented message that references the Message-Id on the
  1850. previous piece may be of benefit to mail readers that
  1851. understand and track references. However, the generation of
  1852. such "References" fields is entirely optional.
  1853. 7.3.3 The Message/External-Body subtype
  1854. The external-body subtype indicates that the actual body
  1855. data are not included, but merely referenced. In this case,
  1856. the parameters describe a mechanism for accessing the
  1857. external data.
  1858. When a message body or body part is of type
  1859. "message/external-body", it consists of a header, two
  1860. consecutive CRLFs, and the message header for the
  1861. encapsulated message. If another pair of consecutive CRLFs
  1862. appears, this of course ends the message header for the
  1863. encapsulated message. However, since the encapsulated
  1864. message's body is itself external, it does NOT appear in the
  1865. area that follows. For example, consider the following
  1866. message:
  1867. Content-type: message/external-body; access-
  1868. type=local-file;
  1869. name=/u/nsb/Me.gif
  1870. Content-type: image/gif
  1871. THIS IS NOT REALLY THE BODY!
  1872. The area at the end, which might be called the "phantom
  1873. body", is ignored for most external-body messages. However,
  1874. it may be used to contain auxilliary information for some
  1875. Borenstein & Freed [Page 40]
  1876. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1877. such messages, as indeed it is when the access-type is
  1878. "mail-server". Of the access-types defined by this
  1879. document, the phantom body is used only when the access-type
  1880. is "mail-server". In all other cases, the phantom body is
  1881. ignored.
  1882. The only always-mandatory parameter for message/external-
  1883. body is "access-type"; all of the other parameters may be
  1884. mandatory or optional depending on the value of access-type.
  1885. ACCESS-TYPE -- One or more case-insensitive words,
  1886. comma-separated, indicating supported access
  1887. mechanisms by which the file or data may be
  1888. obtained. Values include, but are not limited to,
  1889. "FTP", "ANON-FTP", "TFTP", "AFS", "LOCAL-FILE",
  1890. and "MAIL-SERVER". Future values, except for
  1891. experimental values beginning with "X-", must be
  1892. registered with IANA, as described in Appendix F .
  1893. In addition, the following two parameters are optional for
  1894. ALL access-types:
  1895. EXPIRATION -- The date (in the RFC 822 "date-time"
  1896. syntax, as extended by RFC 1123 to permit 4 digits
  1897. in the date field) after which the existence of
  1898. the external data is not guaranteed.
  1899. SIZE -- The size (in octets) of the data. The
  1900. intent of this parameter is to help the recipient
  1901. decide whether or not to expend the necessary
  1902. resources to retrieve the external data.
  1903. PERMISSION -- A field that indicates whether or
  1904. not it is expected that clients might also attempt
  1905. to overwrite the data. By default, or if
  1906. permission is "read", the assumption is that they
  1907. are not, and that if the data is retrieved once,
  1908. it is never needed again. If PERMISSION is "read-
  1909. write", this assumption is invalid, and any local
  1910. copy must be considered no more than a cache.
  1911. "Read" and "Read-write" are the only defined
  1912. values of permission.
  1913. The precise semantics of the access-types defined here are
  1914. described in the sections that follow.
  1915. 7.3.3.1 The "ftp" and "tftp" access-types
  1916. An access-type of FTP or TFTP indicates that the message
  1917. body is accessible as a file using the FTP [RFC-959] or TFTP
  1918. [RFC-783] protocols, respectively. For these access-types,
  1919. the following additional parameters are mandatory:
  1920. Borenstein & Freed [Page 41]
  1921. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1922. NAME -- The name of the file that contains the
  1923. actual body data.
  1924. SITE -- A machine from which the file may be
  1925. obtained, using the given protocol
  1926. Before the data is retrieved, using these protocols, the
  1927. user will generally need to be asked to provide a login id
  1928. and a password for the machine named by the site parameter.
  1929. In addition, the following optional parameters may also
  1930. appear when the access-type is FTP or ANON-FTP:
  1931. DIRECTORY -- A directory from which the data named
  1932. by NAME should be retrieved.
  1933. MODE -- A transfer mode for retrieving the
  1934. information, e.g. "image".
  1935. 7.3.3.2 The "anon-ftp" access-type
  1936. The "anon-ftp" access-type is identical to the "ftp" access
  1937. type, except that the user need not be asked to provide a
  1938. name and password for the specified site. Instead, the ftp
  1939. protocol will be used with login "anonymous" and a password
  1940. that corresponds to the user's email address.
  1941. 7.3.3.3 The "local-file" and "afs" access-types
  1942. An access-type of "local-file" indicates that the actual
  1943. body is accessible as a file on the local machine. An
  1944. access-type of "afs" indicates that the file is accessible
  1945. via the global AFS file system. In both cases, only a
  1946. single parameter is required:
  1947. NAME -- The name of the file that contains the
  1948. actual body data.
  1949. The following optional parameter may be used to describe the
  1950. locality of reference for the data, that is, the site or
  1951. sites at which the file is expected to be visible:
  1952. SITE -- A domain specifier for a machine or set of
  1953. machines that are known to have access to the data
  1954. file. Asterisks may be used for wildcard matching
  1955. to a part of a domain name, such as
  1956. "*.bellcore.com", to indicate a set of machines on
  1957. which the data should be directly visible, while a
  1958. single asterisk may be used to indicate a file
  1959. that is expected to be universally available,
  1960. e.g., via a global file system.
  1961. 7.3.3.4 The "mail-server" access-type
  1962. Borenstein & Freed [Page 42]
  1963. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1964. The "mail-server" access-type indicates that the actual body
  1965. is available from a mail server. The mandatory parameter
  1966. for this access-type is:
  1967. SERVER -- The email address of the mail server
  1968. from which the actual body data can be obtained.
  1969. Because mail servers accept a variety of syntax, some of
  1970. which is multiline, the full command to be sent to a mail
  1971. server is not included as a parameter on the content-type
  1972. line. Instead, it may be provided as the "phantom body"
  1973. when the content-type is message/external-body and the
  1974. access-type is mail-server.
  1975. Note that MIME does not define a mail server syntax.
  1976. Rather, it allows the inclusion of arbitrary mail server
  1977. commands in the phantom body. Implementations should
  1978. include the phantom body in the body of the message it sends
  1979. to the mail server address to retrieve the relevant data.
  1980. Borenstein & Freed [Page 43]
  1981. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  1982. 7.3.3.5 Examples and Further Explanations
  1983. With the emerging possibility of very wide-area file
  1984. systems, it becomes very hard to know in advance the set of
  1985. machines where a file will and will not be accessible
  1986. directly from the file system. Therefore it may make sense
  1987. to provide both a file name, to be tried directly, and the
  1988. name of one or more sites from which the file is known to be
  1989. accessible. An implementation can try to retrieve remote
  1990. files using FTP or any other protocol, using anonymous file
  1991. retrieval or prompting the user for the necessary name and
  1992. password. If an external body is accessible via multiple
  1993. mechanisms, the sender may include multiple parts of type
  1994. message/external-body within an entity of type
  1995. multipart/alternative.
  1996. However, the external-body mechanism is not intended to be
  1997. limited to file retrieval, as shown by the mail-server
  1998. access-type. Beyond this, one can imagine, for example,
  1999. using a video server for external references to video clips.
  2000. If an entity is of type "message/external-body", then the
  2001. body of the entity will contain the header fields of the
  2002. encapsulated message. The body itself is to be found in the
  2003. external location. This means that if the body of the
  2004. "message/external-body" message contains two consecutive
  2005. CRLFs, everything after those pairs is NOT part of the
  2006. message itself. For most message/external-body messages,
  2007. this trailing area must simply be ignored. However, it is a
  2008. convenient place for additional data that cannot be included
  2009. in the content-type header field. In particular, if the
  2010. "access-type" value is "mail-server", then the trailing area
  2011. must contain commands to be sent to the mail server at the
  2012. address given by NAME@SITE, where NAME and SITE are the
  2013. values of the NAME and SITE parameters, respectively.
  2014. The embedded message header fields which appear in the body
  2015. of the message/external-body data can be used to declare the
  2016. Content-type of the external body. Thus a complete
  2017. message/external-body message, referring to a document in
  2018. PostScript format, might look like this:
  2019. From: Whomever
  2020. Subject: whatever
  2021. MIME-Version: 1.0
  2022. Message-ID: id1@host.com
  2023. Content-Type: multipart/alternative; boundary=42
  2024. --42
  2025. Content-Type: message/external-body;
  2026. name="BodyFormats.ps";
  2027. Borenstein & Freed [Page 44]
  2028. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2029. site="thumper.bellcore.com";
  2030. access-type=ANON-FTP;
  2031. directory="pub";
  2032. mode="image";
  2033. expiration="Fri, 14 Jun 1991 19:13:14 -0400 (EDT)"
  2034. Content-type: application/postscript
  2035. --42
  2036. Content-Type: message/external-body;
  2037. name="/u/nsb/writing/rfcs/RFC-XXXX.ps";
  2038. site="thumper.bellcore.com";
  2039. access-type=AFS
  2040. expiration="Fri, 14 Jun 1991 19:13:14 -0400 (EDT)"
  2041. Content-type: application/postscript
  2042. --42
  2043. Content-Type: message/external-body;
  2044. access-type=mail-server
  2045. server="listserv@bogus.bitnet";
  2046. expiration="Fri, 14 Jun 1991 19:13:14 -0400 (EDT)"
  2047. Content-type: application/postscript
  2048. get rfc-xxxx doc
  2049. --42--
  2050. Like the message/partial type, the message/external-body
  2051. type is intended to be transparent, that is, to convey the
  2052. data type in the external body rather than to convey a
  2053. message with a body of that type. Thus the headers on the
  2054. outer and inner parts must be merged using the same rules as
  2055. for message/partial. In particular, this means that the
  2056. Content-type header is overridden, but the From and Subject
  2057. headers are preserved.
  2058. Note that since the external bodies are not transported as
  2059. mail, they need not conform to the 7-bit and line length
  2060. requirements, but might in fact be binary files. Thus a
  2061. Content-Transfer-Encoding is not generally necessary, though
  2062. it is permitted.
  2063. Note that the body of a message of type "message/external-
  2064. body" is governed by the basic syntax for an RFC 822
  2065. message. In particular, anything before the first
  2066. consecutive pair of CRLFs is header information, while
  2067. anything after it is body information, which is ignored for
  2068. most access-types.
  2069. Borenstein & Freed [Page 45]
  2070. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2071. 7.4 The Application Content-Type
  2072. The "application" Content-Type is to be used for data which
  2073. do not fit in any of the other categories, and particularly
  2074. for data to be processed by mail-based uses of application
  2075. programs. This is information which must be processed by an
  2076. application before it is viewable or usable to a user.
  2077. Expected uses for Content-Type application include mail-
  2078. based file transfer, spreadsheets, data for mail-based
  2079. scheduling systems, and languages for "active"
  2080. (computational) email. (The latter, in particular, can pose
  2081. security problems which should be understood by
  2082. implementors, and are considered in detail in the discussion
  2083. of the application/PostScript content-type.)
  2084. For example, a meeting scheduler might define a standard
  2085. representation for information about proposed meeting dates.
  2086. An intelligent user agent would use this information to
  2087. conduct a dialog with the user, and might then send further
  2088. mail based on that dialog. More generally, there have been
  2089. several "active" messaging languages developed in which
  2090. programs in a suitably specialized language are sent through
  2091. the mail and automatically run in the recipient's
  2092. environment.
  2093. Such applications may be defined as subtypes of the
  2094. "application" Content-Type. This document defines three
  2095. subtypes: octet-stream, ODA, and PostScript.
  2096. In general, the subtype of application will often be the
  2097. name of the application for which the data are intended.
  2098. This does not mean, however, that any application program
  2099. name may be used freely as a subtype of application. Such
  2100. usages must be registered with IANA, as described in
  2101. Appendix F.
  2102. 7.4.1 The Application/Octet-Stream (primary) subtype
  2103. The primary subtype of application, "octet-stream", may be
  2104. used to indicate that a body contains binary data. The set
  2105. of possible parameters includes, but is not limited to:
  2106. NAME -- a suggested name for the binary data if
  2107. stored as a file.
  2108. TYPE -- the general type or category of binary
  2109. data. This is intended as information for the
  2110. human recipient rather than for any automatic
  2111. processing.
  2112. CONVERSIONS -- the set of operations that have
  2113. been performed on the data before putting it in
  2114. the mail (and before any Content-Transfer-Encoding
  2115. that might have been applied). If multiple
  2116. Borenstein & Freed [Page 46]
  2117. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2118. conversions have occurred, they must be separated
  2119. by commas and specified in the order they were
  2120. applied -- that is, the leftmost conversion must
  2121. have occurred first, and conversions are undone
  2122. from right to left. Note that NO conversion
  2123. values are defined by this document. Any
  2124. conversion values that that do not begin with "X-"
  2125. must be preceded by a published specification and
  2126. by registration with IANA, as described in
  2127. Appendix F.
  2128. PADDING -- the number of bits of padding that were
  2129. appended to the bitstream comprising the actual
  2130. contents to produce the enclosed byte-oriented
  2131. data. This is useful for enclosing a bitstream in
  2132. a body when the total number of bits is not a
  2133. multiple of the byte size.
  2134. The values for these attributes are left undefined at
  2135. present, but may require specification in the future. An
  2136. example of a common (though UNIX-specific) usage might be:
  2137. Content-Type: application/octet-stream;
  2138. name=foo.tar.Z; type=tar;
  2139. conversions="x-encrypt,x-compress"
  2140. However, it should be noted that the use of such conversions
  2141. is explicitly discouraged due to a lack of portability and
  2142. standardization. The use of uuencode is particularly
  2143. discouraged, in favor of the Content-Transfer-Encoding
  2144. mechanism, which is both more standardized and more portable
  2145. across mail boundaries.
  2146. The recommended action for an implementation that receives
  2147. application/octet-stream mail is to simply offer to put the
  2148. data in a file, with any Content-Transfer-Encoding undone,
  2149. or perhaps to use it as input to a user-specified process.
  2150. To reduce the danger of transmitting rogue programs through
  2151. the mail, it is strongly recommended that implementations
  2152. NOT implement a path-search mechanism whereby an arbitrary
  2153. program named in the Content-Type parameter (e.g., an
  2154. "interpreter=" parameter) is found and executed using the
  2155. mail body as input.
  2156. 7.4.2 The Application/PostScript subtype
  2157. A Content-Type of "application/postscript" indicates a
  2158. PostScript program. The language is defined in
  2159. [POSTSCRIPT]. It is recommended that Postscript as sent
  2160. through email should use Postscript document structuring
  2161. conventions if at all possible, and correctly.
  2162. Borenstein & Freed [Page 47]
  2163. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2164. The execution of general-purpose PostScript interpreters
  2165. entails serious security risks, and implementors are
  2166. discouraged from simply sending PostScript email bodies to
  2167. "off-the-shelf" interpreters. While it is usually safe to
  2168. send PostScript to a printer, where the potential for harm
  2169. is greatly constrained, implementors should consider all of
  2170. the following before they add interactive display of
  2171. PostScript bodies to their mail readers.
  2172. The remainder of this section outlines some, though probably
  2173. not all, of the possible problems with sending PostScript
  2174. through the mail.
  2175. Dangerous operations in the PostScript language include, but
  2176. may not be limited to, the PostScript operators deletefile,
  2177. renamefile, filenameforall, and file. File is only
  2178. dangerous when applied to something other than standard
  2179. input or output. Implementations may also define additional
  2180. nonstandard file operators; these may also pose a threat to
  2181. security. Filenameforall, the wildcard file search
  2182. operator, may appear at first glance to be harmless. Note,
  2183. however, that this operator has the potential to reveal
  2184. information about what files the recipient has access to,
  2185. and this information may itself be sensitive. Message
  2186. senders should avoid the use of potentially dangerous file
  2187. operators, since these operators are quite likely to be
  2188. unavailable in secure PostScript implementations. Message-
  2189. receiving and -displaying software should either completely
  2190. disable all potentially dangerous file operators or take
  2191. special care not to delegate any special authority to their
  2192. operation. These operators should be viewed as being done by
  2193. an outside agency when interpreting PostScript documents.
  2194. Such disabling and/or checking should be done completely
  2195. outside of the reach of the PostScript language itself; care
  2196. should be taken to insure that no method exists for
  2197. reenabling full-function versions of these operators.
  2198. The PostScript language provides facilities for exiting the
  2199. normal interpreter, or server, loop. Changes made in this
  2200. "outer" environment are customarily retained across
  2201. documents, and may in some cases be retained semipermanently
  2202. in nonvolatile memory. The operators associated with exiting
  2203. the interpreter loop have the potential to interfere with
  2204. subsequent document processing. As such, their unrestrained
  2205. use constitutes a threat of service denial. PostScript
  2206. operators that exit the interpreter loop include, but may
  2207. not be limited to, the exitserver and startjob operators.
  2208. Message-sending software should not generate PostScript that
  2209. depends on exiting the interpreter loop to operate. The
  2210. ability to exit will probably be unavailable in secure
  2211. PostScript implementations. Message-receiving and
  2212. -displaying software should, if possible, disable the
  2213. ability to make retained changes to the PostScript
  2214. environment. Eliminate the startjob and exitserver commands.
  2215. Borenstein & Freed [Page 48]
  2216. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2217. If these commands cannot be eliminated, at least set the
  2218. password associated with them to a hard-to-guess value.
  2219. PostScript provides operators for setting system-wide and
  2220. device-specific parameters. These parameter settings may be
  2221. retained across jobs and may potentially pose a threat to
  2222. the correct operation of the interpreter. The PostScript
  2223. operators that set system and device parameters include, but
  2224. may not be limited to, the setsystemparams and setdevparams
  2225. operators. Message-sending software should not generate
  2226. PostScript that depends on the setting of system or device
  2227. parameters to operate correctly. The ability to set these
  2228. parameters will probably be unavailable in secure PostScript
  2229. implementations. Message-receiving and -displaying software
  2230. should, if possible, disable the ability to change system
  2231. and device parameters. If these operators cannot be
  2232. disabled, at least set the password associated with them to
  2233. a hard-to-guess value.
  2234. Some PostScript implementations provide nonstandard
  2235. facilities for the direct loading and execution of machine
  2236. code. Such facilities are quite obviously open to
  2237. substantial abuse. Message-sending software should not
  2238. make use of such features. Besides being totally hardware-
  2239. specific, they are also likely to be unavailable in secure
  2240. implementations of PostScript. Message-receiving and
  2241. -displaying software should not allow such operators to be
  2242. used if they exist.
  2243. PostScript is an extensible language, and many, if not most,
  2244. implementations of it provide a number of their own
  2245. extensions. This document does not deal with such extensions
  2246. explicitly since they constitute an unknown factor.
  2247. Message-sending software should not make use of nonstandard
  2248. extensions; they are likely to be missing from some
  2249. implementations. Message-receiving and -displaying software
  2250. should make sure that any nonstandard PostScript operators
  2251. are secure and don't present any kind of threat.
  2252. It is possible to write PostScript that consumes huge
  2253. amounts of various system resources. It is also possible to
  2254. write PostScript programs that loop infinitely. Both types
  2255. of programs have the potential to cause damage if sent to
  2256. unsuspecting recipients. Message-sending software should
  2257. avoid the construction and dissemination of such programs,
  2258. which is antisocial. Message-receiving and -displaying
  2259. software should provide appropriate mechanisms to abort
  2260. processing of a document after a reasonable amount of time
  2261. has elapsed. In addition, PostScript interpreters should be
  2262. limited to the consumption of only a reasonable amount of
  2263. any given system resource.
  2264. Finally, bugs may exist in some PostScript interpreters
  2265. which could possibly be exploited to gain unauthorized
  2266. Borenstein & Freed [Page 49]
  2267. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2268. access to a recipient's system. Apart from noting this
  2269. possibility, there is no specific action to take to prevent
  2270. this, apart from the timely correction of such bugs if any
  2271. are found.
  2272. 7.4.3 The Application/ODA subtype
  2273. The "ODA" subtype of application is used to indicate that a
  2274. body contains information encoded according to the Office
  2275. Document Architecture [ODA] standards, using the ODIF
  2276. representation format. For application/oda, the Content-
  2277. Type line should also specify an attribute/value pair that
  2278. indicates the document application profile (DAP), using the
  2279. key word "profile". Thus an appropriate header field might
  2280. look like this:
  2281. Content-Type: application/oda; profile=Q112
  2282. Consult the ODA standard [ODA] for further information.
  2283. Borenstein & Freed [Page 50]
  2284. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2285. 7.5 The Image Content-Type
  2286. A Content-Type of "image" indicates that the bodycontains an
  2287. image. The subtype names the specific image format. These
  2288. names are case insensitive. Two initial subtypes are "jpeg"
  2289. for the JPEG format, JFIF encoding, and "gif" for GIF format
  2290. [GIF].
  2291. The list of image subtypes given here is neither exclusive
  2292. nor exhaustive, and is expected to grow as more types are
  2293. registered with IANA, as described in Appendix F.
  2294. 7.6 The Audio Content-Type
  2295. A Content-Type of "audio" indicates that the body contains
  2296. audio data. Although there is not yet a consensus on an
  2297. "ideal" audio format for use with computers, there is a
  2298. pressing need for a format capable of providing
  2299. interoperable behavior.
  2300. The initial subtype of "basic" is specified to meet this
  2301. requirement by providing an absolutely minimal lowest common
  2302. denominator audio format. It is expected that richer
  2303. formats for higher quality and/or lower bandwidth audio will
  2304. be defined by a later document.
  2305. The content of the "audio/basic" subtype is audio encoded
  2306. using 8-bit ISDN u-law [PCM]. When this subtype is present,
  2307. a sample rate of 8000 Hz and a single channel is assumed.
  2308. 7.7 The Video Content-Type
  2309. A Content-Type of "video" indicates that the body contains a
  2310. time-varying-picture image, possibly with color and
  2311. coordinated sound. The term "video" is used extremely
  2312. generically, rather than with reference to any particular
  2313. technology or format, and is not meant to preclude subtypes
  2314. such as animated drawings encoded compactly. The subtype
  2315. "mpeg" refers to video coded according to the MPEG standard
  2316. [MPEG].
  2317. Note that although in general this document strongly
  2318. discourages the mixing of multiple media in a single body,
  2319. it is recognized that many so-called "video" formats include
  2320. a representation for synchronized audio, and this is
  2321. explicitly permitted for subtypes of "video".
  2322. 7.8 Experimental Content-Type Values
  2323. A Content-Type value beginning with the characters "X-" is a
  2324. private value, to be used by consenting mail systems by
  2325. mutual agreement. Any format without a rigorous and public
  2326. definition must be named with an "X-" prefix, and publicly
  2327. specified values shall never begin with "X-". (Older
  2328. Borenstein & Freed [Page 51]
  2329. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2330. versions of the widely-used Andrew system use the "X-BE2"
  2331. name, so new systems should probably choose a different
  2332. name.)
  2333. In general, the use of "X-" top-level types is strongly
  2334. discouraged. Implementors should invent subtypes of the
  2335. existing types whenever possible. The invention of new
  2336. types is intended to be restricted primarily to the
  2337. development of new media types for email, such as digital
  2338. odors or holography, and not for new data formats in
  2339. general. In many cases, a subtype of application will be
  2340. more appropriate than a new top-level type.
  2341. Borenstein & Freed [Page 52]
  2342. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2343. Summary
  2344. Using the MIME-Version, Content-Type, and Content-Transfer-
  2345. Encoding header fields, it is possible to include, in a
  2346. standardized way, arbitrary types of data objects with RFC
  2347. 822 conformant mail messages. No restrictions imposed by
  2348. either RFC 821 or RFC 822 are violated, and care has been
  2349. taken to avoid problems caused by additional restrictions
  2350. imposed by the characteristics of some Internet mail
  2351. transport mechanisms (see Appendix B). The "multipart" and
  2352. "message" Content-Types allow mixing and hierarchical
  2353. structuring of objects of different types in a single
  2354. message. Further Content-Types provide a standardized
  2355. mechanism for tagging messages or body parts as audio,
  2356. image, or several other kinds of data. A distinguished
  2357. parameter syntax allows further specification of data format
  2358. details, particularly the specification of alternate
  2359. character sets. Additional optional header fields provide
  2360. mechanisms for certain extensions deemed desirable by many
  2361. implementors. Finally, a number of useful Content-Types are
  2362. defined for general use by consenting user agents, notably
  2363. text/richtext, message/partial, and message/external-body.
  2364. Borenstein & Freed [Page 53]
  2365. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2366. Acknowledgements
  2367. This document is the result of the collective effort of a
  2368. large number of people, at several IETF meetings, on the
  2369. IETF-SMTP and IETF-822 mailing lists, and elsewhere.
  2370. Although any enumeration seems doomed to suffer from
  2371. egregious omissions, the following are among the many
  2372. contributors to this effort:
  2373. Harald Tveit Alvestrand Timo Lehtinen
  2374. Randall Atkinson John R. MacMillan
  2375. Philippe Brandon Rick McGowan
  2376. Kevin Carosso Leo Mclaughlin
  2377. Uhhyung Choi Goli Montaser-Kohsari
  2378. Cristian Constantinof Keith Moore
  2379. Mark Crispin Tom Moore
  2380. Dave Crocker Erik Naggum
  2381. Terry Crowley Mark Needleman
  2382. Walt Daniels John Noerenberg
  2383. Frank Dawson Mats Ohrman
  2384. Hitoshi Doi Julian Onions
  2385. Kevin Donnelly Michael Patton
  2386. Keith Edwards David J. Pepper
  2387. Chris Eich Blake C. Ramsdell
  2388. Johnny Eriksson Luc Rooijakkers
  2389. Craig Everhart Marshall T. Rose
  2390. Patrik Faeltstroem Jonathan Rosenberg
  2391. Erik E. Fair Jan Rynning
  2392. Roger Fajman Harri Salminen
  2393. Alain Fontaine Michael Sanderson
  2394. James M. Galvin Masahiro Sekiguchi
  2395. Philip Gladstone Mark Sherman
  2396. Thomas Gordon Keld Simonsen
  2397. Phill Gross Bob Smart
  2398. James Hamilton Peter Speck
  2399. Steve Hardcastle-Kille Henry Spencer
  2400. David Herron Einar Stefferud
  2401. Bruce Howard Michael Stein
  2402. Bill Janssen Klaus Steinberger
  2403. Olle Jaernefors Peter Svanberg
  2404. Risto Kankkunen James Thompson
  2405. Phil Karn Steve Uhler
  2406. Alan Katz Stuart Vance
  2407. Tim Kehres Erik van der Poel
  2408. Neil Katin Guido van Rossum
  2409. Kyuho Kim Peter Vanderbilt
  2410. Anders Klemets Greg Vaudreuil
  2411. John Klensin Ed Vielmetti
  2412. Valdis Kletniek Ryan Waldron
  2413. Jim Knowles Wally Wedel
  2414. Stev Knowles Sven-Ove Westberg
  2415. Bob Kummerfeld Brian Wideen
  2416. Borenstein & Freed [Page 54]
  2417. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2418. Pekka Kytolaakso John Wobus
  2419. Stellan Lagerstr.m Glenn Wright
  2420. Vincent Lau Rayan Zachariassen
  2421. Donald Lindsay David Zimmerman
  2422. The authors apologize for any omissions from this list,
  2423. which are certainly unintentional.
  2424. Borenstein & Freed [Page 55]
  2425. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2426. Appendix A -- Minimal MIME-Conformance
  2427. The mechanisms described in this document are open-ended.
  2428. It is definitely not expected that all implementations will
  2429. support all of the Content-Types described, nor that they
  2430. will all share the same extensions. In order to promote
  2431. interoperability, however, it is useful to define the
  2432. concept of "MIME-conformance" to define a certain level of
  2433. implementation that allows the useful interworking of
  2434. messages with content that differs from US ASCII text. In
  2435. this section, we specify the requirements for such
  2436. conformance.
  2437. A mail user agent that is MIME-conformant MUST:
  2438. 1. Always generate a "MIME-Version: 1.0" header
  2439. field.
  2440. 2. Recognize the Content-Transfer-Encoding header
  2441. field, and decode all received data encoded with
  2442. either the quoted-printable or base64
  2443. implementations. Encode any data sent that is
  2444. not in seven-bit mail-ready representation using
  2445. one of these transformations and include the
  2446. appropriate Content-Transfer-Encoding header
  2447. field, unless the underlying transport mechanism
  2448. supports non-seven-bit data, as SMTP does not.
  2449. 3. Recognize and interpret the Content-Type
  2450. header field, and avoid showing users raw data
  2451. with a Content-Type field other than text. Be
  2452. able to send at least text/plain messages, with
  2453. the character set specified as a parameter if it
  2454. is not US-ASCII.
  2455. 4. Explicitly handle the following Content-Type
  2456. values, to at least the following extents:
  2457. Text:
  2458. -- Recognize and display "text" mail
  2459. with the character set "US-ASCII."
  2460. -- Recognize other character sets at
  2461. least to the extent of being able
  2462. to inform the user about what
  2463. character set the message uses.
  2464. -- Recognize the "ISO-8859-*" character
  2465. sets to the extent of being able to
  2466. display those characters that are
  2467. common to ISO-8859-* and US-ASCII,
  2468. namely all characters represented
  2469. by octet values 0-127.
  2470. -- For unrecognized subtypes, show or
  2471. offer to show the user the "raw"
  2472. version of the data. An ability at
  2473. Borenstein & Freed [Page 56]
  2474. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2475. least to convert "text/richtext" to
  2476. plain text, as shown in Appendix D,
  2477. is encouraged, but not required for
  2478. conformance.
  2479. Message:
  2480. --Recognize and display at least the
  2481. primary (822) encapsulation.
  2482. Multipart:
  2483. -- Recognize the primary (mixed)
  2484. subtype. Display all relevant
  2485. information on the message level
  2486. and the body part header level and
  2487. then display or offer to display
  2488. each of the body parts
  2489. individually.
  2490. -- Recognize the "alternative" subtype,
  2491. and avoid showing the user
  2492. redundant parts of
  2493. multipart/alternative mail.
  2494. -- Treat any unrecognized subtypes as if
  2495. they were "mixed".
  2496. Application:
  2497. -- Offer the ability to remove either of
  2498. the two types of Content-Transfer-
  2499. Encoding defined in this document
  2500. and put the resulting information
  2501. in a user file.
  2502. 5. Upon encountering any unrecognized Content-
  2503. Type, an implementation must treat it as if it had
  2504. a Content-Type of "application/octet-stream" with
  2505. no parameter sub-arguments. How such data are
  2506. handled is up to an implementation, but likely
  2507. options for handling such unrecognized data
  2508. include offering the user to write it into a file
  2509. (decoded from its mail transport format) or
  2510. offering the user to name a program to which the
  2511. decoded data should be passed as input.
  2512. Unrecognized predefined types, which in a MIME-
  2513. conformant mailer might still include audio,
  2514. image, or video, should also be treated in this
  2515. way.
  2516. A user agent that meets the above conditions is said to be
  2517. MIME-conformant. The meaning of this phrase is that it is
  2518. assumed to be "safe" to send virtually any kind of
  2519. properly-marked data to users of such mail systems, because
  2520. such systems will at least be able to treat the data as
  2521. undifferentiated binary, and will not simply splash it onto
  2522. the screen of unsuspecting users. There is another sense
  2523. in which it is always "safe" to send data in a format that
  2524. is MIME-conformant, which is that such data will not break
  2525. or be broken by any known systems that are conformant with
  2526. RFC 821 and RFC 822. User agents that are MIME-conformant
  2527. Borenstein & Freed [Page 57]
  2528. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2529. have the additional guarantee that the user will not be
  2530. shown data that were never intended to be viewed as text.
  2531. Borenstein & Freed [Page 58]
  2532. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2533. Appendix B -- General Guidelines For Sending Email Data
  2534. Internet email is not a perfect, homogeneous system. Mail
  2535. may become corrupted at several stages in its travel to a
  2536. final destination. Specifically, email sent throughout the
  2537. Internet may travel across many networking technologies.
  2538. Many networking and mail technologies do not support the
  2539. full functionality possible in the SMTP transport
  2540. environment. Mail traversing these systems is likely to be
  2541. modified in such a way that it can be transported.
  2542. There exist many widely-deployed non-conformant MTAs in the
  2543. Internet. These MTAs, speaking the SMTP protocol, alter
  2544. messages on the fly to take advantage of the internal data
  2545. structure of the hosts they are implemented on, or are just
  2546. plain broken.
  2547. The following guidelines may be useful to anyone devising a
  2548. data format (Content-Type) that will survive the widest
  2549. range of networking technologies and known broken MTAs
  2550. unscathed. Note that anything encoded in the base64
  2551. encoding will satisfy these rules, but that some well-known
  2552. mechanisms, notably the UNIX uuencode facility, will not.
  2553. Note also that anything encoded in the Quoted-Printable
  2554. encoding will survive most gateways intact, but possibly not
  2555. some gateways to systems that use the EBCDIC character set.
  2556. (1) Under some circumstances the encoding used for
  2557. data may change as part of normal gateway or user
  2558. agent operation. In particular, conversion from
  2559. base64 to quoted-printable and vice versa may be
  2560. necessary. This may result in the confusion of
  2561. CRLF sequences with line breaks in text body
  2562. parts. As such, the persistence of CRLF as
  2563. something other than a line break should not be
  2564. relied on.
  2565. (2) Many systems may elect to represent and store
  2566. text data using local newline conventions. Local
  2567. newline conventions may not match the RFC822 CRLF
  2568. convention -- systems are known that use plain CR,
  2569. plain LF, CRLF, or counted records. The result is
  2570. that isolated CR and LF characters are not well
  2571. tolerated in general; they may be lost or
  2572. converted to delimiters on some systems, and hence
  2573. should not be relied on.
  2574. (3) TAB (HT) characters may be misinterpreted or
  2575. may be automatically converted to variable numbers
  2576. of spaces. This is unavoidable in some
  2577. environments, notably those not based on the ASCII
  2578. character set. Such conversion is STRONGLY
  2579. DISCOURAGED, but it may occur, and mail formats
  2580. should not rely on the persistence of TAB (HT)
  2581. Borenstein & Freed [Page 59]
  2582. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2583. characters.
  2584. (4) Lines longer than 76 characters may be wrapped
  2585. or truncated in some environments. Line wrapping
  2586. and line truncation are STRONGLY DISCOURAGED, but
  2587. unavoidable in some cases. Applications which
  2588. require long lines should somehow differentiate
  2589. between soft and hard line breaks. (A simple way
  2590. to do this is to use the quoted-printable
  2591. encoding.)
  2592. (5) Trailing "white space" characters (SPACE, TAB
  2593. (HT)) on a line may be discarded by some transport
  2594. agents, while other transport agents may pad lines
  2595. with these characters so that all lines in a mail
  2596. file are of equal length. The persistence of
  2597. trailing white space, therefore, should not be
  2598. relied on.
  2599. (6) Many mail domains use variations on the ASCII
  2600. character set, or use character sets such as
  2601. EBCDIC which contain most but not all of the US-
  2602. ASCII characters. The correct translation of
  2603. characters not in the "invariant" set cannot be
  2604. depended on across character converting gateways.
  2605. For example, this situation is a problem when
  2606. sending uuencoded information across BITNET, an
  2607. EBCDIC system. Similar problems can occur without
  2608. crossing a gateway, since many Internet hosts use
  2609. character sets other than ASCII internally. The
  2610. definition of Printable Strings in X.400 adds
  2611. further restrictions in certain special cases. In
  2612. particular, the only characters that are known to
  2613. be consistent across all gateways are the 73
  2614. characters that correspond to the upper and lower
  2615. case letters A-Z and a-z, the 10 digits 0-9, and
  2616. the following eleven special characters:
  2617. "'" (ASCII code 39)
  2618. "(" (ASCII code 40)
  2619. ")" (ASCII code 41)
  2620. "+" (ASCII code 43)
  2621. "," (ASCII code 44)
  2622. "-" (ASCII code 45)
  2623. "." (ASCII code 46)
  2624. "/" (ASCII code 47)
  2625. ":" (ASCII code 58)
  2626. "=" (ASCII code 61)
  2627. "?" (ASCII code 63)
  2628. A maximally portable mail representation, such as
  2629. the base64 encoding, will confine itself to
  2630. relatively short lines of text in which the only
  2631. meaningful characters are taken from this set of
  2632. Borenstein & Freed [Page 60]
  2633. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2634. 73 characters.
  2635. Please note that the above list is NOT a list of recommended
  2636. practices for MTAs. RFC 821 MTAs are prohibited from
  2637. altering the character of white space or wrapping long
  2638. lines. These BAD and illegal practices are known to occur
  2639. on established networks, and implementions should be robust
  2640. in dealing with the bad effects they can cause.
  2641. Borenstein & Freed [Page 61]
  2642. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2643. Appendix C -- A Complex Multipart Example
  2644. What follows is the outline of a complex multipart message.
  2645. This message has five parts to be displayed serially: two
  2646. introductory plain text parts, an embedded multipart
  2647. message, a richtext part, and a closing encapsulated text
  2648. message in a non-ASCII character set. The embedded
  2649. multipart message has two parts to be displayed in parallel,
  2650. a picture and an audio fragment.
  2651. MIME-Version: 1.0
  2652. From: Nathaniel Borenstein <nsb@bellcore.com>
  2653. Subject: A multipart example
  2654. Content-Type: multipart/mixed;
  2655. boundary=unique-boundary-1
  2656. This is the preamble area of a multipart message.
  2657. Mail readers that understand multipart format
  2658. should ignore this preamble.
  2659. If you are reading this text, you might want to
  2660. consider changing to a mail reader that understands
  2661. how to properly display multipart messages.
  2662. --unique-boundary-1
  2663. ...Some text appears here...
  2664. [Note that the preceding blank line means
  2665. no header fields were given and this is text,
  2666. with charset US ASCII. It could have been
  2667. done with explicit typing as in the next part.]
  2668. --unique-boundary-1
  2669. Content-type: text/plain; charset=US-ASCII
  2670. This could have been part of the previous part,
  2671. but illustrates explicit versus implicit
  2672. typing of body parts.
  2673. --unique-boundary-1
  2674. Content-Type: multipart/parallel;
  2675. boundary=unique-boundary-2
  2676. --unique-boundary-2
  2677. Content-Type: audio/basic
  2678. Content-Transfer-Encoding: base64
  2679. ... base64-encoded 8000 Hz single-channel
  2680. u-law-format audio data goes here....
  2681. --unique-boundary-2
  2682. Content-Type: image/gif
  2683. Content-Transfer-Encoding: Base64
  2684. Borenstein & Freed [Page 62]
  2685. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2686. ... base64-encoded image data goes here....
  2687. --unique-boundary-2--
  2688. --unique-boundary-1
  2689. Content-type: text/richtext
  2690. This is <bold><italic>richtext.</italic></bold>
  2691. <nl><nl>Isn't it
  2692. <bigger><bigger>cool?</bigger></bigger>
  2693. --unique-boundary-1
  2694. Content-Type: message/rfc822
  2695. From: (name in US-ASCII)
  2696. Subject: (subject in US-ASCII)
  2697. Content-Type: Text/plain; charset=ISO-8859-1
  2698. Content-Transfer-Encoding: Quoted-printable
  2699. ... Additional text in ISO-8859-1 goes here ...
  2700. --unique-boundary-1--
  2701. Borenstein & Freed [Page 63]
  2702. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2703. Appendix D -- A Simple Richtext-to-Text Translator in C
  2704. One of the major goals in the design of the richtext subtype
  2705. of the text Content-Type is to make formatted text so simple
  2706. that even text-only mailers will implement richtext-to-
  2707. plain-text translators, thus increasing the likelihood that
  2708. multifont text will become "safe" to use very widely. To
  2709. demonstrate this simplicity, what follows is an extremely
  2710. simple 44-line C program that converts richtext input into
  2711. plain text output:
  2712. #include <stdio.h>
  2713. #include <ctype.h>
  2714. main() {
  2715. int c, i;
  2716. char token[50];
  2717. while((c = getc(stdin)) != EOF) {
  2718. if (c == '<') {
  2719. for (i=0; (i<49 && (c = getc(stdin)) != '>'
  2720. && c != EOF); ++i) {
  2721. token[i] = isupper(c) ? tolower(c) : c;
  2722. }
  2723. if (c == EOF) break;
  2724. if (c != '>') while ((c = getc(stdin)) !=
  2725. '>'
  2726. && c != EOF) {;}
  2727. if (c == EOF) break;
  2728. token[i] = '\0';
  2729. if (!strcmp(token, "lt")) {
  2730. putc('<', stdout);
  2731. } else if (!strcmp(token, "nl")) {
  2732. putc('\n', stdout);
  2733. } else if (!strcmp(token, "/paragraph")) {
  2734. fputs("\n\n", stdout);
  2735. } else if (!strcmp(token, "comment")) {
  2736. int commct=1;
  2737. while (commct > 0) {
  2738. while ((c = getc(stdin)) != '<'
  2739. && c != EOF) ;
  2740. if (c == EOF) break;
  2741. for (i=0; (c = getc(stdin)) != '>'
  2742. && c != EOF; ++i) {
  2743. token[i] = isupper(c) ?
  2744. tolower(c) : c;
  2745. }
  2746. if (c== EOF) break;
  2747. token[i] = NULL;
  2748. if (!strcmp(token, "/comment")) --
  2749. commct;
  2750. if (!strcmp(token, "comment"))
  2751. ++commct;
  2752. Borenstein & Freed [Page 64]
  2753. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2754. }
  2755. } /* Ignore all other tokens */
  2756. } else if (c != '\n') putc(c, stdout);
  2757. }
  2758. putc('\n', stdout); /* for good measure */
  2759. }
  2760. It should be noted that one can do considerably better than
  2761. this in displaying richtext data on a dumb terminal. In
  2762. particular, one can replace font information such as "bold"
  2763. with textual emphasis (like *this* or _T_H_I_S_). One can
  2764. also properly handle the richtext formatting commands
  2765. regarding indentation, justification, and others. However,
  2766. the above program is all that is necessary in order to
  2767. present richtext on a dumb terminal.
  2768. Borenstein & Freed [Page 65]
  2769. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2770. Appendix E -- Collected Grammar
  2771. This appendix contains the complete BNF grammar for all the
  2772. syntax specified by this document.
  2773. By itself, however, this grammar is incomplete. It refers
  2774. to several entities that are defined by RFC 822. Rather
  2775. than reproduce those definitions here, and risk
  2776. unintentional differences between the two, this document
  2777. simply refers the reader to RFC 822 for the remaining
  2778. definitions. Wherever a term is undefined, it refers to the
  2779. RFC 822 definition.
  2780. attribute := token
  2781. body-part = <"message" as defined in RFC 822,
  2782. with all header fields optional, and with the
  2783. specified delimiter not occurring anywhere in
  2784. the message body, either on a line by itself
  2785. or as a substring anywhere.>
  2786. boundary := 0*69<bchars> bcharsnospace
  2787. bchars := bcharsnospace / " "
  2788. bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /
  2789. "_"
  2790. / "," / "-" / "." / "/" / ":" / "=" / "?"
  2791. close-delimiter := delimiter "--"
  2792. Content-Description := *text
  2793. Content-ID := msg-id
  2794. Content-Transfer-Encoding := "BASE64" / "QUOTED-
  2795. PRINTABLE" /
  2796. "8BIT" / "7BIT" /
  2797. "BINARY" / x-token
  2798. Content-Type := type "/" subtype *[";" parameter]
  2799. delimiter := CRLF "--" boundary ; taken from Content-Type
  2800. field.
  2801. ; when content-type is
  2802. multipart
  2803. ; There should be no space
  2804. ; between "--" and boundary.
  2805. encapsulation := delimiter CRLF body-part
  2806. epilogue := *text ; to be ignored upon
  2807. receipt.
  2808. Borenstein & Freed [Page 66]
  2809. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2810. MIME-Version := 1*text
  2811. multipart-body := preamble 1*encapsulation close-delimiter
  2812. epilogue
  2813. parameter := attribute "=" value
  2814. preamble := *text ; to be ignored upon
  2815. receipt.
  2816. subtype := token
  2817. token := 1*<any CHAR except SPACE, CTLs, or tspecials>
  2818. tspecials := "(" / ")" / "<" / ">" / "@" ; Must be in
  2819. / "," / ";" / ":" / "\" / <"> ; quoted-string,
  2820. / "/" / "[" / "]" / "?" / "." ; to use within
  2821. / "=" ; parameter values
  2822. type := "application" / "audio" ; case-
  2823. insensitive
  2824. / "image" / "message"
  2825. / "multipart" / "text"
  2826. / "video" / x-token
  2827. value := token / quoted-string
  2828. x-token := <The two characters "X-" followed, with no
  2829. intervening white space, by any token>
  2830. Borenstein & Freed [Page 67]
  2831. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2832. Appendix F -- IANA Registration Procedures
  2833. MIME has been carefully designed to have extensible
  2834. mechanisms, and it is expected that the set of content-
  2835. type/subtype pairs and their associated parameters will grow
  2836. significantly with time. Several other MIME fields, notably
  2837. character set names, access-type parameters for the
  2838. message/external-body type, conversions parameters for the
  2839. application type, and possibly even Content-Transfer-
  2840. Encoding values, are likely to have new values defined over
  2841. time. In order to ensure that the set of such values is
  2842. developed in an orderly, well-specified, and public manner,
  2843. MIME defines a registration process which uses the Internet
  2844. Assigned Numbers Authority (IANA) as a central registry for
  2845. such values.
  2846. In general, parameters in the content-type header field are
  2847. used to convey supplemental information for various content
  2848. types, and their use is defined when the content-type and
  2849. subtype are defined. New parameters should not be defined
  2850. as a way to introduce new functionality.
  2851. In order to simplify and standardize the registration
  2852. process, this appendix gives templates for the registration
  2853. of new values with IANA. Each of these is given in the form
  2854. of an email message template, to be filled in by the
  2855. registering party.
  2856. F.1 Registration of New Content-type/subtype Values
  2857. Note that MIME is generally expected to be extended by
  2858. subtypes. If a new fundamental top-level type is needed,
  2859. its specification should be published as an RFC or
  2860. submitted in a form suitable to become an RFC, and be
  2861. subject to the Internet standards process.
  2862. To: IANA@isi.edu
  2863. Subject: Registration of new MIME content-type/subtype
  2864. MIME type name:
  2865. (If the above is not an existing top-level MIME type,
  2866. please explain why an existing type cannot be used.)
  2867. MIME subtype name:
  2868. Required parameters:
  2869. Optional parameters:
  2870. Encoding considerations:
  2871. Security considerations:
  2872. Borenstein & Freed [Page 68]
  2873. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2874. Published specification:
  2875. (The published specification must be an Internet RFC or
  2876. RFC-to-be if a new top-level type is being defined, and
  2877. must be a publicly available specification in any
  2878. case.)
  2879. Person & email address to contact for further
  2880. information:
  2881. F.2 Registration of New Character Set Values
  2882. To: IANA@isi.edu
  2883. Subject: Registration of new MIME character set value
  2884. MIME character set name:
  2885. Published specification:
  2886. (The published specification must be an Internet RFC or
  2887. RFC-to-be or an international standard.)
  2888. Person & email address to contact for further
  2889. information:
  2890. F.3 Registration of New Access-type Values for
  2891. Message/external-body
  2892. To: IANA@isi.edu
  2893. Subject: Registration of new MIME Access-type for
  2894. Message/external-body content-type
  2895. MIME access-type name:
  2896. Required parameters:
  2897. Optional parameters:
  2898. Published specification:
  2899. (The published specification must be an Internet RFC or
  2900. RFC-to-be.)
  2901. Person & email address to contact for further
  2902. information:
  2903. F.4 Registration of New Conversions Values for Application
  2904. To: IANA@isi.edu
  2905. Subject: Registration of new MIME Conversions value
  2906. for Application content-type
  2907. MIME Conversions name:
  2908. Borenstein & Freed [Page 69]
  2909. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2910. Published specification:
  2911. (The published specification must be an Internet RFC or
  2912. RFC-to-be.)
  2913. Person & email address to contact for further
  2914. information:
  2915. Borenstein & Freed [Page 70]
  2916. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2917. Appendix G -- Summary of the Seven Content-types
  2918. Content-type: text
  2919. Subtypes defined by this document: plain, richtext
  2920. Important Parameters: charset
  2921. Encoding notes: quoted-printable generally preferred if an
  2922. encoding is needed and the character set is mostly an
  2923. ASCII superset.
  2924. Security considerations: Rich text formats such as TeX and
  2925. Troff often contain mechanisms for executing arbitrary
  2926. commands or file system operations, and should not be
  2927. used automatically unless these security problems have
  2928. been addressed. Even plain text may contain control
  2929. characters that can be used to exploit the capabilities
  2930. of "intelligent" terminals and cause security
  2931. violations. User interfaces designed to run on such
  2932. terminals should be aware of and try to prevent such
  2933. problems.
  2934. ________________________________________________________________
  2935. Content-type: multipart
  2936. Subtypes defined by this document: mixed, alternative,
  2937. digest, parallel.
  2938. Important Parameters: boundary
  2939. Encoding notes: No content-transfer-encoding is permitted.
  2940. ________________________________________________________________
  2941. Content-type: message
  2942. Subtypes defined by this document: rfc822, partial,
  2943. external-body
  2944. Important Parameters: id, number, total
  2945. Encoding notes: No content-transfer-encoding is permitted.
  2946. ________________________________________________________________
  2947. Content-type: application
  2948. Subtypes defined by this document: octet-stream,
  2949. postscript, oda
  2950. Important Parameters: profile
  2951. Borenstein & Freed [Page 71]
  2952. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2953. Encoding notes: base64 generally preferred for octet-stream
  2954. or other unreadable subtypes.
  2955. Security considerations: This type is intended for the
  2956. transmission of data to be interpreted by locally-installed
  2957. programs. If used, for example, to transmit executable
  2958. binary programs or programs in general-purpose interpreted
  2959. languages, such as LISP programs or shell scripts, severe
  2960. security problems could result. In general, authors of
  2961. mail-reading agents are cautioned against giving their
  2962. systems the power to execute mail-based application data
  2963. without carefully considering the security implications.
  2964. While it is certainly possible to define safe application
  2965. formats and even safe interpreters for unsafe formats, each
  2966. interpreter should be evaluated separately for possible
  2967. security problems.
  2968. ________________________________________________________________
  2969. Content-type: image
  2970. Subtypes defined by this document: jpeg, gif
  2971. Important Parameters: none
  2972. Encoding notes: base64 generally preferred
  2973. ________________________________________________________________
  2974. Content-type: audio
  2975. Subtypes defined by this document: basic
  2976. Important Parameters: none
  2977. Encoding notes: base64 generally preferred
  2978. ________________________________________________________________
  2979. Content-type: video
  2980. Subtypes defined by this document: mpeg
  2981. Important Parameters: none
  2982. Encoding notes: base64 generally preferred
  2983. Borenstein & Freed [Page 72]
  2984. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  2985. Appendix H -- Canonical Encoding Model
  2986. There was some confusion, in earlier drafts of this memo,
  2987. regarding the model for when email data was to be converted
  2988. to canonical form and encoded, and in particular how this
  2989. process would affect the treatment of CRLFs, given that the
  2990. representation of newlines varies greatly from system to
  2991. system. For this reason, a canonical model for encoding is
  2992. presented below.
  2993. The process of composing a MIME message part can be modelled
  2994. as being done in a number of steps. Note that these steps
  2995. are roughly similar to those steps used in RFC1113:
  2996. Step 1. Creation of local form.
  2997. The body part to be transmitted is created in the system's
  2998. native format. The native character set is used, and where
  2999. appropriate local end of line conventions are used as well.
  3000. The may be a UNIX-style text file, or a Sun raster image, or
  3001. a VMS indexed file, or audio data in a system-dependent
  3002. format stored only in memory, or anything else that
  3003. corresponds to the local model for the representation of
  3004. some form of information.
  3005. Step 2. Conversion to canonical form.
  3006. The entire body part, including "out-of-band" information
  3007. such as record lengths and possibly file attribute
  3008. information, is converted to a universal canonical form.
  3009. The specific content type of the body part as well as its
  3010. associated attributes dictate the nature of the canonical
  3011. form that is used. Conversion to the proper canonical form
  3012. may involve character set conversion, transformation of
  3013. audio data, compression, or various other operations
  3014. specific to the various content types.
  3015. For example, in the case of text/plain data, the text must
  3016. be converted to a supported character set and lines must be
  3017. delimited with CRLF delimiters in accordance with RFC822.
  3018. Note that the restriction on line lengths implied by RFC822
  3019. is eliminated if the next step employs either quoted-
  3020. printable or base64 encoding.
  3021. Step 3. Apply transfer encoding.
  3022. A Content-Transfer-Encoding appropriate for this body part
  3023. is applied. Note that there is no fixed relationship
  3024. between the content type and the transfer encoding. In
  3025. particular, it may be appropriate to base the choice of
  3026. base64 or quoted-printable on character frequency counts
  3027. which are specific to a given instance of body part.
  3028. Borenstein & Freed [Page 73]
  3029. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  3030. Step 4. Insertion into message.
  3031. The encoded object is inserted into a MIME message with
  3032. appropriate body part headers and boundary markers.
  3033. It is vital to note that these steps are only a model; they
  3034. are specifically NOT a blueprint for how an actual system
  3035. would be built. In particular, the model fails to account
  3036. for two common designs:
  3037. 1. In many cases the conversion to a canonical
  3038. form prior to encoding will be subsumed into the
  3039. encoder itself, which understands local formats
  3040. directly. For example, the local newline
  3041. convention for text bodyparts might be carried
  3042. through to the encoder itself along with knowledge
  3043. of what that format is.
  3044. 2. The output of the encoders may have to pass
  3045. through one or more additional steps prior to
  3046. being transmitted as a message. As such, the
  3047. output of the encoder may not be compliant with
  3048. the formats specified by RFC822. In particular,
  3049. once again it may be appropriate for the
  3050. converter's output to be expressed using local
  3051. newline conventions rather than using the standard
  3052. RFC822 CRLF delimiters.
  3053. Other implementation variations are conceivable as well.
  3054. The only important aspect of this discussion is that the
  3055. resulting messages are consistent with those produced by the
  3056. model described here.
  3057. Borenstein & Freed [Page 74]
  3058. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  3059. References
  3060. [US-ASCII] Coded Character Set--7-Bit American Standard Code
  3061. for Information Interchange, ANSI X3.4-1986.
  3062. [ATK] Borenstein, Nathaniel S., Multimedia Applications
  3063. Development with the Andrew Toolkit, Prentice-Hall, 1990.
  3064. [GIF] Graphics Interchange Format (Version 89a), Compuserve,
  3065. Inc., Columbus, Ohio, 1990.
  3066. [ISO-2022] International Standard--Information Processing--
  3067. ISO 7-bit and 8-bit coded character sets--Code extension
  3068. techniques, ISO 2022:1986.
  3069. [ISO-8859] Information Processing -- 8-bit Single-Byte Coded
  3070. Graphic Character Sets -- Part 1: Latin Alphabet No. 1, ISO
  3071. 8859-1:1987. Part 2: Latin alphabet No. 2, ISO 8859-2,
  3072. 1987. Part 3: Latin alphabet No. 3, ISO 8859-3, 1988. Part
  3073. 4: Latin alphabet No. 4, ISO 8859-4, 1988. Part 5:
  3074. Latin/Cyrillic alphabet, ISO 8859-5, 1988. Part 6:
  3075. Latin/Arabic alphabet, ISO 8859-6, 1987. Part 7:
  3076. Latin/Greek alphabet, ISO 8859-7, 1987. Part 8:
  3077. Latin/Hebrew alphabet, ISO 8859-8, 1988. Part 9: Latin
  3078. alphabet No. 5, ISO 8859-9, 1990.
  3079. [ISO-646] International Standard--Information Processing--
  3080. ISO 7-bit coded character set for information interchange,
  3081. ISO 646:1983.
  3082. [MPEG] Video Coding Draft Standard ISO 11172 CD, ISO
  3083. IEC/TJC1/SC2/WG11 (Motion Picture Experts Group), May, 1991.
  3084. [ODA] ISO 8613; Information Processing: Text and Office
  3085. System; Office Document Architecture (ODA) and Interchange
  3086. Format (ODIF), Part 1-8, 1989.
  3087. [PCM] CCITT, Fascicle III.4 - Recommendation G.711, Geneva,
  3088. 1972, "Pulse Code Modulation (PCM) of Voice Frequencies".
  3089. [POSTSCRIPT] Adobe Systems, Inc., PostScript Language
  3090. Reference Manual, Addison-Wesley, 1985.
  3091. [X400] Schicker, Pietro, "Message Handling Systems, X.400",
  3092. Message Handling Systems and Distributed Applications, E.
  3093. Stefferud, O-j. Jacobsen, and P. Schicker, eds., North-
  3094. Holland, 1989, pp. 3-41.
  3095. [RFC-783] Sollins, K.R. TFTP Protocol (revision 2). June,
  3096. 1981, MIT, RFC-783.
  3097. [RFC-821] Postel, J.B. Simple Mail Transfer Protocol.
  3098. August, 1982, USC/Information Sciences Institute, RFC-821.
  3099. Borenstein & Freed [Page 75]
  3100. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  3101. [RFC-822] Crocker, D. Standard for the format of ARPA
  3102. Internet text messages. August, 1982, UDEL, RFC-822.
  3103. [RFC-934] Rose, M.T.; Stefferud, E.A. Proposed standard
  3104. for message encapsulation. January, 1985, Delaware
  3105. and NMA, RFC-934.
  3106. [RFC-959] Postel, J.B.; Reynolds, J.K. File Transfer
  3107. Protocol. October, 1985, USC/Information Sciences
  3108. Institute, RFC-959.
  3109. [RFC-1049] Sirbu, M.A. Content-Type header field for
  3110. Internet messages. March, 1988, CMU, RFC-1049.
  3111. [RFC-1113] Linn, J. Privacy enhancement for Internet
  3112. electronic mail: Part I - message encipherment and
  3113. authentication procedures. August, 1989, IAB Privacy Task
  3114. Force, RFC-1113.
  3115. [RFC-1154] Robinson, D.; Ullmann, R. Encoding header field
  3116. for Internet messages. April, 1990, Prime Computer,
  3117. Inc., RFC-1154.
  3118. [RFC-1342] Moore, Keith, Representation of Non-Ascii Text in
  3119. Internet Message Headers. June, 1992, University of
  3120. Tennessee, RFC-1342.
  3121. Security Considerations
  3122. Security issues are discussed in Section 7.4.2 and in
  3123. Appendix G. Implementors should pay special attention to
  3124. the security implications of any mail content-types that can
  3125. cause the remote execution of any actions in the recipient's
  3126. environment. In such cases, the discussion of the
  3127. applicaton/postscript content-type in Section 7.4.2 may
  3128. serve as a model for considering other content-types with
  3129. remote execution capabilities.
  3130. Borenstein & Freed [Page 76]
  3131. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  3132. Authors' Addresses
  3133. For more information, the authors of this document may be
  3134. contacted via Internet mail:
  3135. Nathaniel S. Borenstein
  3136. MRE 2D-296, Bellcore
  3137. 445 South St.
  3138. Morristown, NJ 07962-1910
  3139. Phone: +1 201 829 4270
  3140. Fax: +1 201 829 7019
  3141. Email: nsb@bellcore.com
  3142. Ned Freed
  3143. Innosoft International, Inc.
  3144. 250 West First Street
  3145. Suite 240
  3146. Claremont, CA 91711
  3147. Phone: +1 714 624 7907
  3148. Fax: +1 714 621 5319
  3149. Email: ned@innosoft.com
  3150. Borenstein & Freed [Page 77]
  3151. RFC 1341MIME: Multipurpose Internet Mail ExtensionsJune 1992
  3152. THIS PAGE INTENTIONALLY LEFT BLANK.
  3153. Please discard this page and place the following table of
  3154. contents after the title page.
  3155. Borenstein & Freed [Page i]
  3156. Table of Contents
  3157. 1 Introduction....................................... 1
  3158. 2 Notations, Conventions, and Generic BNF Grammar.... 3
  3159. 3 The MIME-Version Header Field...................... 5
  3160. 4 The Content-Type Header Field...................... 6
  3161. 5 The Content-Transfer-Encoding Header Field......... 10
  3162. 5.1 Quoted-Printable Content-Transfer-Encoding......... 14
  3163. 5.2 Base64 Content-Transfer-Encoding................... 17
  3164. 6 Additional Optional Content- Header Fields......... 19
  3165. 6.1 Optional Content-ID Header Field................... 19
  3166. 6.2 Optional Content-Description Header Field.......... 19
  3167. 7 The Predefined Content-Type Values................. 20
  3168. 7.1 The Text Content-Type.............................. 20
  3169. 7.1.1 The charset parameter.............................. 20
  3170. 7.1.2 The Text/plain subtype............................. 23
  3171. 7.1.3 The Text/richtext subtype.......................... 23
  3172. 7.2 The Multipart Content-Type......................... 29
  3173. 7.2.1 Multipart: The common syntax...................... 30
  3174. 7.2.2 The Multipart/mixed (primary) subtype.............. 34
  3175. 7.2.3 The Multipart/alternative subtype.................. 34
  3176. 7.2.4 The Multipart/digest subtype....................... 36
  3177. 7.2.5 The Multipart/parallel subtype..................... 36
  3178. 7.3 The Message Content-Type........................... 37
  3179. 7.3.1 The Message/rfc822 (primary) subtype............... 37
  3180. 7.3.2 The Message/Partial subtype........................ 37
  3181. 7.3.3 The Message/External-Body subtype.................. 40
  3182. 7.4 The Application Content-Type....................... 46
  3183. 7.4.1 The Application/Octet-Stream (primary) subtype..... 46
  3184. 7.4.2 The Application/PostScript subtype................. 47
  3185. 7.4.3 The Application/ODA subtype........................ 50
  3186. 7.5 The Image Content-Type............................. 51
  3187. 7.6 The Audio Content-Type............................. 51
  3188. 7.7 The Video Content-Type............................. 51
  3189. 7.8 Experimental Content-Type Values................... 51
  3190. Summary............................................ 53
  3191. Acknowledgements................................... 54
  3192. Appendix A -- Minimal MIME-Conformance............. 56
  3193. Appendix B -- General Guidelines For Sending Email Data59
  3194. Appendix C -- A Complex Multipart Example.......... 62
  3195. Appendix D -- A Simple Richtext-to-Text Translator in C64
  3196. Appendix E -- Collected Grammar.................... 66
  3197. Appendix F -- IANA Registration Procedures......... 68
  3198. F.1 Registration of New Content-type/subtype Values..68
  3199. F.2 Registration of New Character Set Values...... 69
  3200. F.3 Registration of New Access-type Values for Message/external-body69
  3201. F.4 Registration of New Conversions Values for Application69
  3202. Appendix G -- Summary of the Seven Content-types... 71
  3203. Appendix H -- Canonical Encoding Model............. 73
  3204. References......................................... 75
  3205. Security Considerations............................ 76
  3206. Authors' Addresses................................. 77
  3207. Borenstein & Freed [Page ii]
  3208. Borenstein & Freed [Page iii]