@ akashivskyy का उत्तर बहुत अच्छा है और दर्शाता है कि NSAttributedString
HTML संस्थाओं को डिकोड कैसे किया जाए । एक संभावित नुकसान (जैसा कि उन्होंने कहा था) यह है कि सभी HTML मार्कअप को भी हटा दिया जाता है, इसलिए
<strong> 4 < 5 & 3 > 2</strong>
हो जाता है
4 < 5 & 3 > 2
OS X पर, CFXMLCreateStringByUnescapingEntities()
जो काम करता है:
let encoded = "<strong> 4 < 5 & 3 > 2 .</strong> Price: 12 €. @ "
let decoded = CFXMLCreateStringByUnescapingEntities(nil, encoded, nil) as String
println(decoded)
// <strong> 4 < 5 & 3 > 2 .</strong> Price: 12 €. @
लेकिन यह iOS पर उपलब्ध नहीं है।
यहाँ एक शुद्ध स्विफ्ट कार्यान्वयन है। यह <
एक शब्दकोश का उपयोग करते हुए वर्ण संस्थाओं के संदर्भों को डिकोड करता है , और सभी संख्यात्मक वर्ण निकाय जैसे @
या€
। (ध्यान दें कि मैंने सभी 252 HTML संस्थाओं को स्पष्ट रूप से सूचीबद्ध नहीं किया है।)
स्विफ्ट 4:
// Mapping from XML/HTML character entity reference to character
// From http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
private let characterEntities : [ Substring : Character ] = [
// XML predefined entities:
""" : "\"",
"&" : "&",
"'" : "'",
"<" : "<",
">" : ">",
// HTML character entity references:
" " : "\u{00a0}",
// ...
"♦" : "♦",
]
extension String {
/// Returns a new string made by replacing in the `String`
/// all HTML character entity references with the corresponding
/// character.
var stringByDecodingHTMLEntities : String {
// ===== Utility functions =====
// Convert the number in the string to the corresponding
// Unicode character, e.g.
// decodeNumeric("64", 10) --> "@"
// decodeNumeric("20ac", 16) --> "€"
func decodeNumeric(_ string : Substring, base : Int) -> Character? {
guard let code = UInt32(string, radix: base),
let uniScalar = UnicodeScalar(code) else { return nil }
return Character(uniScalar)
}
// Decode the HTML character entity to the corresponding
// Unicode character, return `nil` for invalid input.
// decode("@") --> "@"
// decode("€") --> "€"
// decode("<") --> "<"
// decode("&foo;") --> nil
func decode(_ entity : Substring) -> Character? {
if entity.hasPrefix("&#x") || entity.hasPrefix("&#X") {
return decodeNumeric(entity.dropFirst(3).dropLast(), base: 16)
} else if entity.hasPrefix("&#") {
return decodeNumeric(entity.dropFirst(2).dropLast(), base: 10)
} else {
return characterEntities[entity]
}
}
// ===== Method starts here =====
var result = ""
var position = startIndex
// Find the next '&' and copy the characters preceding it to `result`:
while let ampRange = self[position...].range(of: "&") {
result.append(contentsOf: self[position ..< ampRange.lowerBound])
position = ampRange.lowerBound
// Find the next ';' and copy everything from '&' to ';' into `entity`
guard let semiRange = self[position...].range(of: ";") else {
// No matching ';'.
break
}
let entity = self[position ..< semiRange.upperBound]
position = semiRange.upperBound
if let decoded = decode(entity) {
// Replace by decoded character:
result.append(decoded)
} else {
// Invalid entity, copy verbatim:
result.append(contentsOf: entity)
}
}
// Copy remaining characters to `result`:
result.append(contentsOf: self[position...])
return result
}
}
उदाहरण:
let encoded = "<strong> 4 < 5 & 3 > 2 .</strong> Price: 12 €. @ "
let decoded = encoded.stringByDecodingHTMLEntities
print(decoded)
// <strong> 4 < 5 & 3 > 2 .</strong> Price: 12 €. @
स्विफ्ट 3:
// Mapping from XML/HTML character entity reference to character
// From http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
private let characterEntities : [ String : Character ] = [
// XML predefined entities:
""" : "\"",
"&" : "&",
"'" : "'",
"<" : "<",
">" : ">",
// HTML character entity references:
" " : "\u{00a0}",
// ...
"♦" : "♦",
]
extension String {
/// Returns a new string made by replacing in the `String`
/// all HTML character entity references with the corresponding
/// character.
var stringByDecodingHTMLEntities : String {
// ===== Utility functions =====
// Convert the number in the string to the corresponding
// Unicode character, e.g.
// decodeNumeric("64", 10) --> "@"
// decodeNumeric("20ac", 16) --> "€"
func decodeNumeric(_ string : String, base : Int) -> Character? {
guard let code = UInt32(string, radix: base),
let uniScalar = UnicodeScalar(code) else { return nil }
return Character(uniScalar)
}
// Decode the HTML character entity to the corresponding
// Unicode character, return `nil` for invalid input.
// decode("@") --> "@"
// decode("€") --> "€"
// decode("<") --> "<"
// decode("&foo;") --> nil
func decode(_ entity : String) -> Character? {
if entity.hasPrefix("&#x") || entity.hasPrefix("&#X"){
return decodeNumeric(entity.substring(with: entity.index(entity.startIndex, offsetBy: 3) ..< entity.index(entity.endIndex, offsetBy: -1)), base: 16)
} else if entity.hasPrefix("&#") {
return decodeNumeric(entity.substring(with: entity.index(entity.startIndex, offsetBy: 2) ..< entity.index(entity.endIndex, offsetBy: -1)), base: 10)
} else {
return characterEntities[entity]
}
}
// ===== Method starts here =====
var result = ""
var position = startIndex
// Find the next '&' and copy the characters preceding it to `result`:
while let ampRange = self.range(of: "&", range: position ..< endIndex) {
result.append(self[position ..< ampRange.lowerBound])
position = ampRange.lowerBound
// Find the next ';' and copy everything from '&' to ';' into `entity`
if let semiRange = self.range(of: ";", range: position ..< endIndex) {
let entity = self[position ..< semiRange.upperBound]
position = semiRange.upperBound
if let decoded = decode(entity) {
// Replace by decoded character:
result.append(decoded)
} else {
// Invalid entity, copy verbatim:
result.append(entity)
}
} else {
// No matching ';'.
break
}
}
// Copy remaining characters to `result`:
result.append(self[position ..< endIndex])
return result
}
}
स्विफ्ट 2:
// Mapping from XML/HTML character entity reference to character
// From http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
private let characterEntities : [ String : Character ] = [
// XML predefined entities:
""" : "\"",
"&" : "&",
"'" : "'",
"<" : "<",
">" : ">",
// HTML character entity references:
" " : "\u{00a0}",
// ...
"♦" : "♦",
]
extension String {
/// Returns a new string made by replacing in the `String`
/// all HTML character entity references with the corresponding
/// character.
var stringByDecodingHTMLEntities : String {
// ===== Utility functions =====
// Convert the number in the string to the corresponding
// Unicode character, e.g.
// decodeNumeric("64", 10) --> "@"
// decodeNumeric("20ac", 16) --> "€"
func decodeNumeric(string : String, base : Int32) -> Character? {
let code = UInt32(strtoul(string, nil, base))
return Character(UnicodeScalar(code))
}
// Decode the HTML character entity to the corresponding
// Unicode character, return `nil` for invalid input.
// decode("@") --> "@"
// decode("€") --> "€"
// decode("<") --> "<"
// decode("&foo;") --> nil
func decode(entity : String) -> Character? {
if entity.hasPrefix("&#x") || entity.hasPrefix("&#X"){
return decodeNumeric(entity.substringFromIndex(entity.startIndex.advancedBy(3)), base: 16)
} else if entity.hasPrefix("&#") {
return decodeNumeric(entity.substringFromIndex(entity.startIndex.advancedBy(2)), base: 10)
} else {
return characterEntities[entity]
}
}
// ===== Method starts here =====
var result = ""
var position = startIndex
// Find the next '&' and copy the characters preceding it to `result`:
while let ampRange = self.rangeOfString("&", range: position ..< endIndex) {
result.appendContentsOf(self[position ..< ampRange.startIndex])
position = ampRange.startIndex
// Find the next ';' and copy everything from '&' to ';' into `entity`
if let semiRange = self.rangeOfString(";", range: position ..< endIndex) {
let entity = self[position ..< semiRange.endIndex]
position = semiRange.endIndex
if let decoded = decode(entity) {
// Replace by decoded character:
result.append(decoded)
} else {
// Invalid entity, copy verbatim:
result.appendContentsOf(entity)
}
} else {
// No matching ';'.
break
}
}
// Copy remaining characters to `result`:
result.appendContentsOf(self[position ..< endIndex])
return result
}
}