forked from goturak/Teaching-HEIGVD-RES-2019
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
163 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
<groupId>ch.heigvd.res</groupId> | ||
<artifactId>CharacterIODemo</artifactId> | ||
<version>1.0-SNAPSHOT</version> | ||
<packaging>jar</packaging> | ||
<build> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-shade-plugin</artifactId> | ||
<version>2.3</version> | ||
<executions> | ||
<execution> | ||
<phase>package</phase> | ||
<goals> | ||
<goal>shade</goal> | ||
</goals> | ||
<configuration> | ||
<transformers> | ||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> | ||
<mainClass>ch.heigvd.res.io.CharacterIODemo</mainClass> | ||
</transformer> | ||
</transformers> | ||
</configuration> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
|
||
<properties> | ||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
<maven.compiler.source>1.7</maven.compiler.source> | ||
<maven.compiler.target>1.7</maven.compiler.target> | ||
</properties> | ||
<name>CharacterIODemo</name> | ||
</project> |
124 changes: 124 additions & 0 deletions
124
...es/03-CharacterIODemo/CharacterIODemo/src/main/java/ch/heigvd/res/io/CharacterIODemo.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
package ch.heigvd.res.io; | ||
|
||
import java.io.ByteArrayOutputStream; | ||
import java.io.IOException; | ||
import java.io.OutputStreamWriter; | ||
import java.util.logging.Level; | ||
import java.util.logging.Logger; | ||
|
||
/** | ||
* This program shows how to use character encodings in Java. It shows that while | ||
* Java uses Unicode once characters or Strings are created in memory, a translation | ||
* needs to happen when bytes are converted into characters, and the other way around. | ||
* | ||
* The program also highlight typical problems that arise if the developer does not | ||
* control character encodings. Problems that manifest themselves by seeing '?' or | ||
* other strange characters appear in text messages. | ||
* | ||
* It is a good idea to run this program in debug mode, to be able to explore the | ||
* memory representations of the manipulated strings. | ||
* | ||
* @author Olivier Liechti | ||
*/ | ||
public class CharacterIODemo { | ||
|
||
/** | ||
* This method manipulates the Unicode string passed in parameter. It first | ||
* converts it into a byte array, using the specified encoding (hence, using | ||
* different encodings should produce different byte arrays, possibly of | ||
* different sizes!). It then converts the byte array back into a String. It | ||
* does this twice: a first time using the default encoding (platform specific) | ||
* and a second time using the specified encoding. | ||
* | ||
* @param message the test String we want to encode | ||
* @param encoding the encoding we want to use for encoding/decoding message | ||
* @throws java.io.IOException | ||
*/ | ||
public void encodeAndDecode(String message, String encoding) throws IOException { | ||
// Lets create an output stream, which will send written bytes to a memory zone | ||
ByteArrayOutputStream os = new ByteArrayOutputStream(); | ||
|
||
// Let's wrap an OutputStreamWriter around it. When writing characters on | ||
// this writer, it will be responsible for converting them into sequences of | ||
// bytes. How? By using the encoding that we pass as parameter. | ||
OutputStreamWriter writer = new OutputStreamWriter(os, encoding); | ||
writer.write(message); | ||
writer.flush(); | ||
|
||
// We have sent characters to the writer, it has translated them into bytes. | ||
// These bytes have been forwarded to the wrapped ByteArrayOutputStream instance. | ||
// We can now fetch these bytes. | ||
byte[] encodedMessage = os.toByteArray(); | ||
|
||
System.out.println("When I encode '" + message + "' (" + message.length() + " chars) with encoding " + encoding + ", I generate " + encodedMessage.length + " bytes."); | ||
dumpByteArray(encodedMessage); | ||
|
||
// Now, let's do the work the other way around. Let's take the byte array and convert it | ||
// to a Java String. Note that there are different constructors in the Java class. Some would let | ||
// us specify the encoding (i.e. specify how we want to translates bytes into characters). In this | ||
// case, we are using the default one (which you can specify by starting your JVM with -Dfile.encoding=XXX). | ||
// So, we expect this call to break in many cases! If we converted our test string using the UTF-16 encoding, | ||
// and then use to the UTF-8 encoding in the other direction, we will have corrupted our string! | ||
String decodedMessage = new String(encodedMessage); | ||
System.out.print("If I decode the result with the default encoding for this JVM (" + System.getProperty("file.encoding") + "), I get: "); | ||
System.out.println(decodedMessage); | ||
|
||
|
||
// Now, let's use the same encoding for converting the byte array back into a String. That should work better in some | ||
// cases, BUT we will also encounter situations where the message is not displayed correctly. The reason is that encoding | ||
// our test string into bytes MAY result in losing some information. For instance, if our test string contains japanese | ||
// characters and we encode it with ASCII, then the encoder will be lost. It will be unable to process japanese characters | ||
// and will replace them with '?' characters. Once this is done, when we try to go back from the byte array to the String, | ||
// we will have lost some of the information and won't be able to recover the original string. | ||
decodedMessage = new String(encodedMessage, encoding); | ||
System.out.print("If I decode the result with the same encoding (" + encoding + ") for this JVM (" + System.getProperty("file.encoding") + "), I get: "); | ||
System.out.println(decodedMessage); | ||
System.out.println(); | ||
} | ||
|
||
/** | ||
* A utility method to dump a byte array (showing the binary and decimal values) | ||
* onto the console. | ||
* | ||
* @param array the byte array we want to show | ||
*/ | ||
private void dumpByteArray(byte[] array) { | ||
for (int i=0; i<array.length; i++) { | ||
String s1 = String.format("%8s", Integer.toBinaryString(array[i] & 0xFF)).replace(' ', '0'); | ||
String s2 = String.format("%5s", array[i]).replace(' ', ' '); | ||
System.out.print(s1 + " " + s2 + "\n"); | ||
} | ||
|
||
} | ||
|
||
/** | ||
* @param args the command line arguments | ||
*/ | ||
public static void main(String[] args) { | ||
CharacterIODemo demo = new CharacterIODemo(); | ||
|
||
// Let's see how 1) plain latin characters, 2) characters with accents | ||
// and 3) japanese characters work with our test procedure | ||
|
||
String message = "ABC élève 広島"; | ||
|
||
try { | ||
|
||
// Do a run with the ASCII encoding (no support for accents, nor japanese) | ||
demo.encodeAndDecode(message, "US-ASCII"); | ||
|
||
// Do a run with the latin western european encoding (support for accents, not japanese) | ||
demo.encodeAndDecode(message, "ISO-8859-15"); | ||
|
||
// Do a run with the UTF-8 encoding | ||
demo.encodeAndDecode(message, "UTF-8"); | ||
|
||
// Do a run with the UTF-16 encoding | ||
demo.encodeAndDecode(message, "UTF-16"); | ||
|
||
} catch (IOException ex) { | ||
Logger.getLogger(CharacterIODemo.class.getName()).log(Level.SEVERE, null, ex); | ||
} | ||
} | ||
|
||
} |